欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

将 H264 + PCMA 转为MP4格式(RTSP协议)

程序员文章站 2022-07-12 22:18:51
...

 

问题:

通过rtsp交互,解析RTP流(h264视频+alaw(pcma)音频), 将其保存为MP4格式。

 

解决方案:

使用MP4V2 ,版本2.0.0, VC2010可编译。(还有一种gpac库,听说支持h265,未实验)

Mp4V2下载地址: https://code.google.com/archive/p/mp4v2/downloads  ,  文件名mp4v2-2.0.0.tar.bz2

参考文章: 使用mp4v2将H264+AAC合成mp4文件

参考文章: 将RTSP流录制为mp4文件

参考文章:  linux下利用mp4v2库将h264和aac文件封装成MP4

参考文章: Android音视频系列:视频容器操作篇 -- mp4容器打包实现

参考文章: Mp4V2调试经验记录   (视频或者音画同步的主要参数是duration)

参考文章:mp4v2 接口函数  : 其中有一些注意的地方,我似乎没有理会。

 

PS: 考虑过FFmpeg,但是太大,Windows下编译麻烦,不会裁剪。Mp4V2比较轻量级,适合做一个SDK。

 

前期:

建议先对MP4格式有一点基本了解, 看看一些MP4格式的文档,有一个基本概念。


参考文章: mp4文件格式解析 , 介绍详细,其中推荐mp4v2 和 gpac,和一个在线MP4格式解析器,

参考文章(代码): 逐个Box解析MP4文件的代码,挺好的。 MP4 file analyzer  (This is a console application, written by MSVC 2008.It will display mp4 file content in human readable format.)

 

 

套路:


//创建mp4文件
MP4FileHandle file = MP4CreateEx("d:\\test.mp4", MP4_CREATE_64BIT_DATA | MP4_CREATE_64BIT_TIME);


MP4SetTimeScale(file, 90000);

MP4TrackId video = MP4AddH264VideoTrack(file, 90000, 90000/_fps, _width, _height,
                                                0x64, //sps[1] AVCProfileIndication
                                                0x00, //sps[2] profile_compat
                                                0x1f, //sps[3] AVCLevelIndication
                                                3); // 4 bytes length before each NAL unit
MP4SetVideoProfileLevel(file, 0x7F);

//源码重新编译,新加函数MP4AddALawAudioTrack2
MP4TrackId audio = MP4AddALawAudioTrack2(file,
                                         8000,   //timescale
                                         8000*40/1000);   //sampleDuration.  40ms

MP4SetTrackIntegerProperty(file,audio, "mdia.minf.stbl.stsd.alaw.channels",1);
MP4SetAudioProfileLevel(file, 0x2);




while(1)
{
    //更新SPS
   MP4AddH264SequenceParameterSet(file, video, (const uint8_t*)(pH264+4), len);

    //更新PPS
   MP4AddH264PictureParameterSet(file, video, (const uint8_t*)(pH264+4), len);

    //更新H264 (前四个字节需要特殊处理)
   if(key_frame)
   {
     MP4WriteSample(file, video, (const uint8_t* )pH264, len, MP4_INVALID_DURATION, 0, 
          1);
   }else{
     MP4WriteSample(file, video, (const uint8_t* )pH264, len, MP4_INVALID_DURATION, 0, 
          0);
    }

    //更新pcma
   MP4WriteSample(file, audio, (const uint8_t* )pCMA, len , MP4_INVALID_DURATION, 0, 1);

}

MP4Close(file);

 

PS:      注意Write H264 Sample时,h264流中的NAL,头四个字节是0x00000001,  而mp4中的h264track,头四个字节要求是NAL的长度,并且是大端顺序,所以,写入前,需要进行如下更改:

uint32_t* pSize = (uint32_t*)pH264 ;
*pSize = htonl(len - 4);

 

PS:   源码中函数 AddALawAudioTrack里面计算 

uint32_t fixedSampleDuration = (timeScale * 20)/1000; // 20mSec/Sample

然而实际的1秒25帧, 间隔40ms, 而不是20ms。故而需要手动添加一个新的接口。

 

PS:   音频和视频同步没有特别处理,没考虑。

PS:   SPS,PPS 不知道是否需要每次变更都需要进行  MP4AddH264SequenceParameterSet  ,  MP4AddH264PictureParameterSet操作。 不确定这个操作的影响。

 

 

简单实验结果: 

测试步骤1  :  VLC可播放, 有声音。

测试步骤2: 手机端微信可播放。

测试步骤3: 桌面端微信不可播放。 (猜测可能是音频格式需要AAC格式)

测试步骤4: Html5 可播放,有声音。

测试步骤5: Windows Media Player可播放, 但是没声音。

 

困扰: 发现MP4文件中的mdat 数据特别长, 不知道是否有影响, 这份生成的MP4文件不支持格式工厂转换。

 

其他:

感觉MP4似乎主要用来打包H264和AAC的,而我用的音频是PCMA,不知道有没有兼容性问题。

mp4格式好复杂,结构可变,又多,  里面有好多Box结构体,我曾经企图一个字节一个字节的研究,后来放弃了,复杂。

 

附图(图片来自网络):

将 H264 + PCMA 转为MP4格式(RTSP协议)

 

 

 

将 H264 + PCMA 转为MP4格式(RTSP协议)

 

参考源码:  (来源: https://github.com/Thinkerfans/lib-mp4v2/tree/master/mp4v2

 

lib-mp4v2/mp4v2/mp4record.h

//
//  mp4record.h
//  RTSP_Player
//
//  Created by apple on 15/4/7.
//  Copyright (c) 2015年 thinker. All rights reserved.
//

#ifndef __RTSP_Player__mp4record__
#define __RTSP_Player__mp4record__

#include "mp4v2.h"

#define  _NALU_SPS_  0
#define  _NALU_PPS_  1
#define  _NALU_I_    2
#define  _NALU_P_    3


int initMp4Encoder(const char * filename,int width,int height);
int mp4VEncode(uint8_t * data ,int len);
int mp4AEncode(uint8_t * data ,int len);
void closeMp4Encoder();


#endif /* defined(__RTSP_Player__mp4record__) */

 

lib-mp4v2/mp4v2/mp4record.c

//
//  mp4record.c
//  RTSP_Player
//
//  Created by apple on 15/4/7.
//  Copyright (c) 2015年 thinker. All rights reserved.
//


#include "mp4record.h"
#include <stdlib.h>


typedef struct MP4V2_CONTEXT{
    
    int m_vWidth,m_vHeight,m_vFrateR,m_vTimeScale;
    MP4FileHandle m_mp4FHandle;
    MP4TrackId m_vTrackId,m_aTrackId;
    double m_vFrameDur;
    
} MP4V2_CONTEXT;

struct MP4V2_CONTEXT * recordCtx = NULL;


int initMp4Encoder(const char * filename,int width,int height){
    
    int ret = -1;
    recordCtx = malloc(sizeof(struct MP4V2_CONTEXT));
    if (!recordCtx) {
        printf("error : malloc context \n");
        return ret;
    }
    
    recordCtx->m_vWidth = width;
    recordCtx->m_vHeight = height;
    recordCtx->m_vFrateR = 25;
    recordCtx->m_vTimeScale = 90000;
    recordCtx->m_vFrameDur = 300;
    recordCtx->m_vTrackId = 0;
    recordCtx->m_aTrackId = 0;
    
    recordCtx->m_mp4FHandle = MP4Create(filename,0);
    if (recordCtx->m_mp4FHandle == MP4_INVALID_FILE_HANDLE) {
        printf("error : MP4Create  \n");
        return ret;
    }
     MP4SetTimeScale(recordCtx->m_mp4FHandle, recordCtx->m_vTimeScale);
    //------------------------------------------------------------------------------------- audio track
//    recordCtx->m_aTrackId = MP4AddAudioTrack(recordCtx->m_mp4FHandle, 44100, 1024, MP4_MPEG4_AUDIO_TYPE);
//    if (recordCtx->m_aTrackId == MP4_INVALID_TRACK_ID){
//        printf("error : MP4AddAudioTrack  \n");
//        return ret;
//    }
//
//    MP4SetAudioProfileLevel(recordCtx->m_mp4FHandle, 0x2);
//    uint8_t aacConfig[2] = {18,16};
//    MP4SetTrackESConfiguration(recordCtx->m_mp4FHandle,recordCtx->m_aTrackId,aacConfig,2);
//    printf("ok  : initMp4Encoder file=%s  \n",filename);

    return 0;
}
int mp4VEncode(uint8_t * _naluData ,int _naluSize){
    
    int index = -1;
    
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x67){
        index = _NALU_SPS_;
    }
    
    if(index!=_NALU_SPS_ && recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
        return index;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x68){
        index = _NALU_PPS_;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x65){
        index = _NALU_I_;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x41){
        index = _NALU_P_;
    }
    //
    switch(index){
        case _NALU_SPS_:
            if(recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
                recordCtx->m_vTrackId = MP4AddH264VideoTrack
                (recordCtx->m_mp4FHandle,
                 recordCtx->m_vTimeScale,
                 recordCtx->m_vTimeScale / recordCtx->m_vFrateR,
                 recordCtx->m_vWidth,     // width
                 recordCtx->m_vHeight,    // height
                 _naluData[5], // sps[1] AVCProfileIndication
                 _naluData[6], // sps[2] profile_compat
                 _naluData[7], // sps[3] AVCLevelIndication
                 3);           // 4 bytes length before each NAL unit
                if (recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID)  {
                    return -1;
                }
                MP4SetVideoProfileLevel(recordCtx->m_mp4FHandle, 0x7F); //  Simple Profile @ Level 3
            }
            MP4AddH264SequenceParameterSet(recordCtx->m_mp4FHandle,recordCtx->m_vTrackId,_naluData+4,_naluSize-4);
            //
            break;
        case _NALU_PPS_:
            MP4AddH264PictureParameterSet(recordCtx->m_mp4FHandle,recordCtx->m_vTrackId,_naluData+4,_naluSize-4);
            break;
        case _NALU_I_:
        {
            uint8_t * IFrameData = malloc(_naluSize+1);
            //
            IFrameData[0] = (_naluSize-3) >>24;
            IFrameData[1] = (_naluSize-3) >>16;
            IFrameData[2] = (_naluSize-3) >>8;
            IFrameData[3] = (_naluSize-3) &0xff;
    
            memcpy(IFrameData+4,_naluData+3,_naluSize-3);
//            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, IFrameData, _naluSize+1, recordCtx->m_vFrameDur/44100*90000, 0, 1)){
//                return -1;
//            }
//            recordCtx->m_vFrameDur = 0;
            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, IFrameData, _naluSize+1, MP4_INVALID_DURATION, 0, 1)){
                return -1;
            }
            free(IFrameData);
            //
            break;
        }
        case _NALU_P_:
        {
            _naluData[0] = (_naluSize-4) >>24;  
            _naluData[1] = (_naluSize-4) >>16;  
            _naluData[2] = (_naluSize-4) >>8;  
            _naluData[3] = (_naluSize-4) &0xff;
            
//            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, _naluData, _naluSize, recordCtx->m_vFrameDur/44100*90000, 0, 1)){
//                return -1;
//            }
//            recordCtx->m_vFrameDur = 0;
            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, _naluData, _naluSize, MP4_INVALID_DURATION, 0, 1)){
                return -1;
            }
            break;
        }
    }
    return 0;
}


int mp4AEncode(uint8_t * data ,int len){
    if(recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
        return -1;
    }
    MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_aTrackId, data, len , MP4_INVALID_DURATION, 0, 1);
    recordCtx->m_vFrameDur += 1024;
    return 0;
}

void closeMp4Encoder(){
    if(recordCtx){
        if (recordCtx->m_mp4FHandle != MP4_INVALID_FILE_HANDLE) {
            MP4Close(recordCtx->m_mp4FHandle,0);
            recordCtx->m_mp4FHandle = NULL;
        }
        
        free(recordCtx);
        recordCtx = NULL;
    }
    
    printf("ok  : closeMp4Encoder  \n");

}