一.概述
本文将在Mac os系统上使用FFmpeg进行音视频的H264,H265编码。
使用FFmpeg版本为4.2。
二、编码器初始化
有两点需要注意的是:
1.设置pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
的目的是可以通过pCodecContext->extradata
和pCodecContext->extradata_size
提取到返回PPS,SPS,VPS数据,适用于直播场景,注释中也写的很清楚Place global headers in extradata instead of every keyframe.
。
如果没有设置则会在AVPacket.data
中和其他数据一起返回,适用于直接写入文件。
2.设置pPacket.flags |= AV_PKT_FLAG_KEY
的目的可以在编码后的AVPacket
中识别出是否为关键帧。
int ret;
enum AVCodecID codecID = AV_CODEC_ID_H264;
if (!kUseH264Encode) {
codecID = AV_CODEC_ID_HEVC;
}
pCodec = avcodec_find_encoder(codecID);
pCodecContext = avcodec_alloc_context3(pCodec);
pCodecContext->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
pCodecContext->width = 1280;
pCodecContext->height = 720;
pCodecContext->time_base.num = 1;
pCodecContext->time_base.den = 25;
pCodecContext->bit_rate = 1000 * 1000;
pCodecContext->qmin = 10;
pCodecContext->qmax = 51;
pCodecContext->gop_size = 25;
pCodecContext->max_b_frames = 0;
// pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
AVDictionary *param = NULL;
if (kUseH264Encode) {
av_dict_set(¶m, "preset", "slow", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
}else{
av_dict_set(¶m, "preset", "ultrafast", 0);
av_dict_set(¶m, "tune", "zero-latency", 0);
}
if (avcodec_open2(pCodecContext, pCodec, ¶m)<0) {
return;
}
pFrame = av_frame_alloc();
pFrame->width = pCodecContext->width;
pFrame->height = pCodecContext->height;
pFrame->format = pCodecContext->pix_fmt;
ret = av_frame_get_buffer(pFrame, 0);
if (ret < 0) {
printf("ret == %s\n", av_err2str(ret));
}
//初始化avpacket
av_init_packet(&pPacket);
pPacket.flags |= AV_PKT_FLAG_KEY;
三、编码
在Mac OS系统或者iOS系统中,采集到的一般是CMSampleBufferRef
对象,需要先从中拿到CVPixelBufferRef
对象,再从其中提取YUV
数据。而此处的YUV
格式,是一种two-plane
模式,即Y和UV分为两个Plane,但是UV(CbCr)为交错存储,而不是分为三个plane,需要最终转换为420P格式,即YYYYUV
。
// 锁定imageBuffer内存地址开始进行编码
if (CVPixelBufferLockBaseAddress(pixelBuffer, 0) == kCVReturnSuccess) {
//获取Y分量的地址
UInt8 *bufferPtr = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer,0);
//获取UV分量的地址
UInt8 *bufferPtr1 = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer,1);
//根据像素获取图片的真实宽度&高度
size_t width = CVPixelBufferGetWidth(pixelBuffer);
size_t height = CVPixelBufferGetHeight(pixelBuffer);
// 获取Y分量长度
size_t bytesrow0 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer,0);
size_t bytesrow1 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer,1);
UInt8 *yuv420_data = (UInt8 *)malloc(width * height * 3 / 2);
//将NV12数据转成YUV420P(I420)数据
UInt8 *pY = bufferPtr;
UInt8 *pUV = bufferPtr1;
UInt8 *pU = yuv420_data + width * height;
UInt8 *pV = pU + width * height / 4;
for(int i =0;i<height;i++)
{
memcpy(yuv420_data+i*width,pY+i*bytesrow0,width);
}
for(int j = 0;j<height/2;j++)
{
for(int i =0;i<width/2;i++)
{
*(pU++) = pUV[i<<1];
*(pV++) = pUV[(i<<1) + 1];
}
pUV += bytesrow1;
}
// 3.5.分别读取YUV的数据
pFrame->data[0] = yuv420_data;
pFrame->data[1] = pFrame->data[0] + width * height;
pFrame->data[2] = pFrame->data[1] + (width * height) / 4;
pFrame->pts = frameCount;
// 5.对编码前的原始数据(AVFormat)利用编码器进行编码,将 pFrame 编码后的数据传入pkt 中
int ret = avcodec_send_frame(pCodecContext, pFrame);
if (ret != 0) {
printf("Failed to encode! \n");
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
return;
}
while (1) {
ret = avcodec_receive_packet(pCodecContext, &pPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
break;
}
frameCount++;
if (pPacket.flags & AV_PKT_FLAG_KEY) {
videoFrame.isKeyFrame = YES;
}
//write file
NSData *data = [NSData dataWithBytes:pPacket.data length:pPacket.size];
if ([self.delegate respondsToSelector:@selector(videoEncoder:encodeData:)]) {
[self.delegate videoEncoder:self encodeData:data];}
//释放packet
av_packet_unref(&pPacket);
}
// 7.释放yuv数据
free(yuv420_data);
}
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
}
四、提取SPS,PPS,VPS数据
上文说了要单独提取SPS,PPS,VPS数据,需开始设置pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER
。
uint8_t *extra_data = pCodecContext->extradata;
int extra_size = pCodecContext->extradata_size;
1.H264编码时,拿到的extra_data
如下所示:
00000001 6764001f acb300a0 0b742000 00030020 00000651 e3064d00 00000168 e9732c8b
很明显SPS,PPS
被4个字节的start code
= 00 00 00 01
分割开,NALU header
只有一个字节:
00 00 00 01 67 ---> (0x67 & 0x1f) = 7 ---> PPS
00 00 00 01 68 ---> (0x68 & 0x1f) = 8 ---> SPS
代码如下:
int pos = 0;
int pps_pos = 0,pps_length = 0;
int sps_pos = 0,sps_length = 0;
while (pos < (extra_size - 4)) {
if (extra_data[pos] == 0 &&
extra_data[pos+1] == 0 &&
extra_data[pos+2] == 0 &&
extra_data[pos+3] == 1) {
if ((extra_data[pos+4] & 0x1f) == 7) {//sps
sps_pos = pos+4;
}else if ((extra_data[pos+4] & 0x1f) == 8){//pps
pps_pos = pos+4;
}
}
pos ++;
}
sps_length = pps_pos - sps_pos - 4;
pps_length = extra_size - pps_pos;
2.H265编码时,同样方法拿到的extra_data
提取SPS,PPS,VPS``NALU header
有两个字节,提取方法如下:
00 00 00 01 40 01 ---> (0x40 & 0x7E)>>1 = 32 ---> VPS
00 00 00 01 42 01 ---> (0x42 & 0x7E)>>1 = 33 ---> SPS
00 00 00 01 44 01 ---> (0x44 & 0x7E)>>1 = 34 ---> PPS
需要注意的是,此处还可能包含被3个字节的start code
= 00 00 01
分割开的NAL_UNIT_SEI
数据:
00 00 00 01 4e 01 ---> (0x4e & 0x7E)>>1 = 39 ---> SEI
五、编码结束
编码结束时,需要冲洗编码器,将编码器中缓存的数据冲洗出来,防止丢帧。方法是发送avcodec_send_frame(pCodecContext, NULL)
,当avcodec_receive_packet
的返回值为AVERROR_EOF
则表示冲洗完成。最后再释放内存。