FFmpeg 音频重采样的两种方法libavcodec和libswresample

程序员文章站 2022-07-05 16:11:54

...

对于很多播放器，在输出时会固定为一种格式（如44100hz，双声道，16bit signed），因为多数设备能够支持这些格式。这种情况下对于不同的多种输入源，即需要进行音频重采样。

1、libavcodec

libavcodec提供了重采样相关接口，该接口较老，一般配合FFmpeg 2版本的解码接口avcodec_decode_audio3使用，将解码数据转换为指定格式。新版本不建议使用该接口。

接口说明：
相关函数头文件定义，接口很清晰：
1）函数av_audio_resample_init()用来初始化重采样的参数，前6个参数很好理解，

@param output_channels 重采样后声道数
@param input_channels 原声道数
@param output_rate 重采样后采样率
@param input_rate 原采样率
@param sample_fmt_out 重采样后音频数据格式
@param sample_fmt_in 原采样数据格式
后4个参数基本是使用缺省参数（我也不太清楚这几个值的使用），分别为：16, 10, 0, 1

2）函数audio_resample()用来重采样，前3个参数分别为上下文/输出数据/输入数据，注意最后一个参数是指“原数据的采样个数”，而不是字节数。函数返回值也是重采样之后的采样个数。

3）函数audio_resample_close()用来清理重采样时分配的资源。

typedef struct ReSampleContext ReSampleContext;
/**
 *  Initialize audio resampling context.
 *
 * @param output_channels  number of output channels
 * @param input_channels   number of input channels
 * @param output_rate      output sample rate
 * @param input_rate       input sample rate
 * @param sample_fmt_out   requested output sample format
 * @param sample_fmt_in    input sample format
 * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff frequency
 * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
 * @param linear           if 1 then the used FIR filter will be linearly interpolated
                           between the 2 closest, if 0 the closest will be used
 * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
 * @return allocated ReSampleContext, NULL if error occurred
 */
attribute_deprecated
ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
                                        int output_rate, int input_rate,
                                        enum AVSampleFormat sample_fmt_out,
                                        enum AVSampleFormat sample_fmt_in,
                                        int filter_length, int log2_phase_count,
                                        int linear, double cutoff);

attribute_deprecated
int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);

/**
 * Free resample context.
 *
 * @param s a non-NULL pointer to a resample context previously
 *          created with av_audio_resample_init()
 */
attribute_deprecated
void audio_resample_close(ReSampleContext *s);

示例代码：
在FFmpeg 3.2上，使用该接口其实并不方便，毕竟这个接口是配合老的音频解码接口avcodec_decode_audio3使用。而新的音频解码接口（avcodec_decode_audio4或者是更新的send/recive接口），会将解码后的音频数据存放在AVFrame 结构体的data中，对于planar类型的数据，是每个声道分辨存在一个数组中的，如data[0],data[1],data[2]…这就有一个问题，重采样时，还得再重新把各个声道数据合并到一起，比较麻烦，所以下面示例中，如果是planar类型的数据，我们只取其中一条声道做重采样。
需要注意的是，planar类型的数据以一条声道来采样，需要转换为对应的非planar类型，否则该接口会转换出错。

（代码只是简单的接口示例，很多异常未处理）

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifdef __cplusplus
extern "C"
{
#endif
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#ifdef __cplusplus
};
#endif
#define MAX_AUDIO_FRAME_SIZE 192000 //48khz 16bit audio 2 channels

int main(int argc, char **argv){
    if(argc < 2){
        return -1;
    }
    const char* in_file = argv[1];

    AVFormatContext *fctx = NULL;
    AVCodecContext *cctx = NULL;
    AVCodec *acodec = NULL;
	
    FILE *audio_dst_file1 = fopen("./before_resample.pcm", "wb");
    FILE *audio_dst_file2 = fopen("./after_resample.pcm", "wb");

    av_register_all();
    avformat_open_input(&fctx, in_file, NULL, NULL);
    avformat_find_stream_info(fctx, NULL);
    //get audio index
    int aidx = av_find_best_stream(fctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    printf("get aidx[%d]!!!\n",aidx);
    //open audio codec
    AVCodecParameters *codecpar = fctx->streams[aidx]->codecpar;
    acodec = avcodec_find_decoder(codecpar->codec_id);
    cctx = avcodec_alloc_context3(acodec);
    avcodec_parameters_to_context(cctx, codecpar);
    avcodec_open2(cctx, acodec, NULL);

    //init resample
    ReSampleContext * resample_ctx = NULL;
    int output_channels = 2;
    int output_rate = 48000;
    int input_channels = cctx->channels;
    int input_rate = cctx->sample_rate;
    int input_sample_fmt = cctx->sample_fmt;
    if(av_sample_fmt_is_planar((AVSampleFormat)input_sample_fmt)){//if planar, we just use one channel
    	input_sample_fmt = input_sample_fmt - (int)AV_SAMPLE_FMT_U8P;
        input_channels = 1;
    }
    AVSampleFormat output_sample_fmt = AV_SAMPLE_FMT_S16;
    printf("input_channels[%d=>%d],input_rate[%d=>%d],input_sample_fmt[%d=>%d]\n",
                    cctx->channels,input_channels,cctx->sample_rate,input_rate,cctx->sample_fmt,input_sample_fmt);
    resample_ctx = av_audio_resample_init(output_channels, input_channels, output_rate, input_rate, 
  											output_sample_fmt,(AVSampleFormat)input_sample_fmt,16, 10, 0, 1);
    if(!resample_ctx){
    	printf("av_audio_resample_init fail!!!\n");
    	return -1;
    }
	
    AVPacket *pkt =av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
	
    short *out_buffer=(short *)av_malloc(MAX_AUDIO_FRAME_SIZE);
    int size = 0;
    
    while(av_read_frame(fctx,pkt) == 0){//DEMUX
        if(pkt->stream_index == aidx){
            avcodec_send_packet(cctx, pkt);
            while(1){
            	int ret = avcodec_receive_frame(cctx, frame);
            	if(ret != 0){
                    break;
            	}else{
                    //before resample
                    size = frame->nb_samples * av_get_bytes_per_sample((AVSampleFormat)frame->format);
                    if(frame->data[0] != NULL){
                        fwrite(frame->data[0], 1, size, audio_dst_file1);
                        memset(out_buffer,0x00,sizeof(out_buffer));
                        size = audio_resample(resample_ctx, out_buffer, (short *)frame->data[0], frame->nb_samples);
                        size = size* av_get_bytes_per_sample(output_sample_fmt) * output_channels ;//samples * byte * channels
                        if(size > 0){
                            fwrite(out_buffer, 1, size, audio_dst_file2);
                        }
                    }
            	}
            	av_frame_unref(frame);
            }
        }
        else{
            //printf("not audio frame!!!\n");
            av_packet_unref(pkt);
            continue;
        }
        av_packet_unref(pkt);
    }

    //close
    audio_resample_close(resample_ctx);
    av_packet_free(&pkt);
    av_frame_free(&frame);
    avcodec_close(cctx);
    avformat_close_input(&fctx);
    av_free(out_buffer);
    fclose(audio_dst_file1);
    fclose(audio_dst_file2);

    return 0;
}

采样结果，上面的例子中，保存了采样前和采样后的PCM。
我们拿一个源中的一条声道，采样率为48000，样本格式为AV_SAMPLE_FMT_FLTP。

按照示例代码，重采样后采样率为48000（48khz一般为DVD的采样率），双声道，采样数据类型为AV_SAMPLE_FMT_S16，即变成了非planar类型的两声道。
这个例子中，声道数/样本格式都发生变化，采样率刚好一样（当然，也可以选取其他频率来验证采样率的变化是否正确，如44100）

2、libswresample

libswresample接口提供了更为方便的重采样方法。

接口说明：
我只介绍几个重要的函数，其他的可以参考对应头文件libswresample/swresample.h
1）函数swr_alloc_set_opts()，申请重采样上下文，并可以将相关参数进行设置。

@param s 重采样上下文，如果为NULL，函数会自己生成
@param out_ch_layout 重采样的声道layout
@param out_sample_fmt 重采样的数据格式
@param out_sample_rate 重采样的采样率
@param in_ch_layout 源声道layout
@param in_sample_fmt 源数据格式
@param in_sample_rate 源采样率

/**
 * Allocate SwrContext if needed and set/reset common parameters.
 *
 * @param s               existing Swr context if available, or NULL if not
 * @param out_ch_layout   output channel layout (AV_CH_LAYOUT_*)
 * @param out_sample_fmt  output sample format (AV_SAMPLE_FMT_*).
 * @param out_sample_rate output sample rate (frequency in Hz)
 * @param in_ch_layout    input channel layout (AV_CH_LAYOUT_*)
 * @param in_sample_fmt   input sample format (AV_SAMPLE_FMT_*).
 * @param in_sample_rate  input sample rate (frequency in Hz)
 * @param log_offset      logging level offset
 * @param log_ctx         parent logging context, can be NULL
 *
 * @see swr_init(), swr_free()
 * @return NULL on error, allocated context otherwise
 */
struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
                                      int log_offset, void *log_ctx);

2）函数 int swr_init(struct SwrContext *s); // 初始化上下文。

3）函数 void swr_free(struct SwrContext **s); // 释放上下文空间。
swr_convert()

针对每一帧音频的处理。把一帧帧的音频作相应的重采样

4）函数int swr_conver

@param s 音频重采样的上下文
@param out 重采样后的数据
@param out_count 重采样输出的单通道的样本数量，注意不是字节数
@param in 重采样前的源数据
@param in_count 输入的单通道的样本数量

/** Convert audio.
 *
 * in and in_count can be set to 0 to flush the last few samples out at the
 * end.
 *
 * If more input is provided than output space, then the input will be buffered.
 * You can avoid this buffering by using swr_get_out_samples() to retrieve an
 * upper bound on the required number of output samples for the given number of
 * input samples. Conversion will run directly without copying whenever possible.
 *
 * @param s         allocated Swr context, with parameters set
 * @param out       output buffers, only the first one need be set in case of packed audio
 * @param out_count amount of space available for output in samples per channel
 * @param in        input buffers, only the first one need to be set in case of packed audio
 * @param in_count  number of input samples available in one channel
 *
 * @return number of samples output per channel, negative value on error
 */
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

示例代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifdef __cplusplus
extern "C"
{
#endif
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
#ifdef __cplusplus
};
#endif
#define MAX_AUDIO_FRAME_SIZE 192000 //48khz 16bit audio 2 channels

int main(int argc, char **argv){
    if(argc < 2){
        return -1;
    }
    const char* in_file = argv[1];

    AVFormatContext *fctx = NULL;
    AVCodecContext *cctx = NULL;
    AVCodec *acodec = NULL;
	
    FILE *audio_dst_file1 = fopen("./before_resample.pcm", "wb");
    FILE *audio_dst_file2 = fopen("./after_resample.pcm", "wb");

    av_register_all();
    avformat_open_input(&fctx, in_file, NULL, NULL);
    avformat_find_stream_info(fctx, NULL);
    //get audio index
    int aidx = av_find_best_stream(fctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    printf("get aidx[%d]!!!\n",aidx);
    //open audio codec
    AVCodecParameters *codecpar = fctx->streams[aidx]->codecpar;
    acodec = avcodec_find_decoder(codecpar->codec_id);
    cctx = avcodec_alloc_context3(acodec);
    avcodec_parameters_to_context(cctx, codecpar);
    avcodec_open2(cctx, acodec, NULL);

    //init resample
    int output_channels = 2;
    int output_rate = 48000;
    int input_channels = cctx->channels;
    int input_rate = cctx->sample_rate;
    AVSampleFormat input_sample_fmt = cctx->sample_fmt;
    AVSampleFormat output_sample_fmt = AV_SAMPLE_FMT_S16;
    printf("channels[%d=>%d],rate[%d=>%d],sample_fmt[%d=>%d]\n",
        input_channels,output_channels,input_rate,output_rate,input_sample_fmt,output_sample_fmt);
    
    SwrContext* resample_ctx = NULL;
    resample_ctx = swr_alloc_set_opts(resample_ctx, av_get_default_channel_layout(output_channels),output_sample_fmt,output_rate,
                            av_get_default_channel_layout(input_channels),input_sample_fmt, input_rate,0,NULL);
    if(!resample_ctx){
        printf("av_audio_resample_init fail!!!\n");
        return -1;
    }
    swr_init(resample_ctx);
    
    AVPacket *pkt =av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    int size = 0;
    uint8_t* out_buffer = (uint8_t*)av_malloc(MAX_AUDIO_FRAME_SIZE);
    
    while(av_read_frame(fctx,pkt) == 0){//DEMUX
        if(pkt->stream_index == aidx){
            avcodec_send_packet(cctx, pkt);
            while(1){
            	int ret = avcodec_receive_frame(cctx, frame);
            	if(ret != 0){
                    break;
            	}else{
                    //before resample
                    size = frame->nb_samples * av_get_bytes_per_sample((AVSampleFormat)frame->format);
                    if(frame->data[0] != NULL){
                        fwrite(frame->data[0], 1, size, audio_dst_file1);
                    }
                    //resample
                    memset(out_buffer,0x00,sizeof(out_buffer));
                    int out_samples = swr_convert(resample_ctx,&out_buffer,frame->nb_samples,(const uint8_t **)frame->data,frame->nb_samples);
                    if(out_samples > 0){
                        av_samples_get_buffer_size(NULL,output_channels ,out_samples, output_sample_fmt, 1);//out_samples*output_channels*av_get_bytes_per_sample(output_sample_fmt);
                        fwrite(out_buffer, 1, size, audio_dst_file2);
                    }
            	}
            	av_frame_unref(frame);
            }
        }
        else{
            //printf("not audio frame!!!\n");
            av_packet_unref(pkt);
            continue;
        }
        av_packet_unref(pkt);
    }

    //close
    swr_free(&resample_ctx);
    av_packet_free(&pkt);
    av_frame_free(&frame);
    avcodec_close(cctx);
    avformat_close_input(&fctx);
    av_free(out_buffer);
    fclose(audio_dst_file1);
    fclose(audio_dst_file2);

    return 0;
}

对比libavcodec提供的方法，libswresample可以很方便的将planar类型的数据都放到重采样流程中，将多个声道采样为我们想要的输出类型。
特别是对于有些planar类型的源，左右声道不一样的时候，使用libavcodec的重采样流程还需要对多个声道数据进行合并处理。而libswresample则可以省去这些麻烦。
如下图，使用libswresample对一个左右声道不一样的源进行重采样后，输出为采样率为48000hz，双声道，采样数据类型为AV_SAMPLE_FMT_S16。从图中，我们可以看到，左右声道是明显不同。
如果按照上面第一个示例代码，则只能对其中一条声道重采样，这样输出的结果左右声道一样，对于很多源，可能就失去了一些细腻的声道效果。

————————————————
版权声明：本文为CSDN博主「wuscblog」的原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/myvest/article/details/89442000