前言

本篇主要介绍ffmpeg实现视频播放器的思路,终端API使用的是SDL,实现端为mac。

播放器结构

播放器架构

如图所示,视频文件能被播放,大致需要经过解复用,视频和音频拆分渲染这两个阶段。

Demuxing为解复用阶段,这个阶段主要获取视频基本信息,如长宽,编码格式等,为后边创建渲染窗口和解码做准备

同时单独开启一个读取帧线程Thread_Circle_For_Frame,将数据分为视频和音频两类,分别存入数据队列

视频数据这个方向,会继续将帧进行解码,转换成能用于显示的图片格式,数据会存入新的队列; 音频数据方向,会被重采样,用于后续音频设备的投喂

同时两个方向需要同步,这里主要以各自的时间戳为准。

代码实现

1.解复用

源文件传入之后,需要解除封装。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
...
/********************解封阶段********************/
//注册
av_register_all();
//获取封装上下文
avformat_open_input(&pFormatCtx, is->filename, NULL, NULL);
//轨信息确认
avformat_find_stream_info(pFormatCtx, NULL);
//分别找到音频轨和视频轨的索引
for(i=0; i<pFormatCtx->nb_streams; i++) {
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
       video_index < 0) {
      video_index=i;
    }
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
       audio_index < 0) {
      audio_index=i;
    }
}
...

然后需要进行音频轨和视频轨的编码器进行初始化。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/*********************编码器初始化阶段********************/
//初始化编码器,这里音频轨和视频轨都需要各自创建一个
codecCtx = avcodec_alloc_context3(NULL);
avcodec_parameters_to_context(codecCtx, pFormatCtx->streams[stream_index]->codecpar);
...
//创建编码器
avcodec_find_decoder(codecCtx->codec_id);
avcodec_open2(codecCtx, codec, NULL);
/********************视频解码准备阶段********************/
...
//获得视频帧转换上下文,用于将视频数据解码还原成YUV格式
sws_getContext(is->video_ctx->width, 
               is->video_ctx->height,
               is->video_ctx->pix_fmt,
               is->video_ctx->width,
               is->video_ctx->height, 
               AV_PIX_FMT_YUV420P,
               SWS_BILINEAR, NULL, NULL, NULL);
//视频渲染设备初始化
 win = SDL_CreateWindow("Media Player",
                       SDL_WINDOWPOS_UNDEFINED,
                       SDL_WINDOWPOS_UNDEFINED,
                       is->video_ctx->width, 
                       is->video_ctx->height
                       SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
renderer = SDL_CreateRenderer(win, -1, 0);
texture = SDL_CreateTexture(renderer,
                           SDL_PIXELFORMAT_IYUV,
                           SDL_TEXTUREACCESS_STREAMING,
                           is->video_ctx->width, 
                           is->video_ctx->height);
...
/********************音频编码准备阶段********************/
//为音频输出设备配置参数
wanted_spec.freq = codecCtx->sample_rate;//采样率
wanted_spec.format = AUDIO_S16SYS;//采样格式
wanted_spec.channels = 2;//通道数
wanted_spec.silence = 0;//表示静音的值。因为声音采样是有符号的,所以0当然就是这个值
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;//采样数目
wanted_spec.callback = audio_callback;//音频数据索取回调
wanted_spec.userdata = is;//回调参数
SDL_OpenAudio(&wanted_spec, &spec);
//配置音频数据重采样上下文
struct SwrContext *audio_convert_ctx;
audio_convert_ctx = swr_alloc();
swr_alloc_set_opts(audio_convert_ctx,          //重采样上下文
                    out_channel_layout,        //输出的layout
                    AV_SAMPLE_FMT_S16,         //输出的样本格式
                    out_sample_rate,           //输出的样本率
                    in_channel_layout,         //输入的layout
                    is->audio_ctx->sample_fmt, //输入的样本格式
                    is->audio_ctx->sample_rate,//输入的样本率
                    0,
                    NULL);
SDL_PauseAudio(0);

做好初始化之后,开启一个线程,用于循环读取出文件数据包,然后将数据包根据类型分为视频帧队列和音频帧队列

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
 for(;;) {
    ...
    //读取
    av_read_frame(is->pFormatCtx, packet);
    ...
    //分类
    if(packet->stream_index == is->videoStream) {
        //视频帧入队
      packet_queue_put(&is->videoq, packet);
    } else if(packet->stream_index == is->audioStream) {
        //音频帧入队
      packet_queue_put(&is->audioq, packet);
    } else {
      av_free_packet(packet);
    }
  }

2.视频帧解码

经过以上代码,我们已经创建了两个数据队列,读帧线程不停的产生数据,所以需要其他的线程来消耗这个两个队列的数据。

视频帧队列的消费线程如下

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
...
for(;;) {
    //读取队列的一帧数据
    if(packet_queue_get(&is->videoq, packet, 1) < 0) {
      break;
    }
    pts = 0;
    //将AVpacket的数据解码成AVFrame
    avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet);
    //获取该帧的PTS
    if((pts = av_frame_get_best_effort_timestamp(pFrame)) == AV_NOPTS_VALUE) {
      pts = 0;
    }
    //转换成实际的时间戳,av_q2d函数将time_base转换成double类型,pts 乘于 时间基得到以秒为单位的时间戳
    pts *= av_q2d(is->video_st->time_base);

    if(frameFinished) {
      //同步视频和音频的时间
      pts = synchronize_video(is, pFrame, pts);
      //生成AVPicture,入队列
      if(queue_picture(is, pFrame, pts) < 0) {
        break;
      }
    }
    av_free_packet(packet);
}
...

queue_picture的处理如下

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
int queue_picture(VideoState *is, AVFrame *pFrame, double pts) {
  VideoPicture *vp;
  SDL_LockMutex(is->pictq_mutex);
  while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE &&
    !is->quit) {
    SDL_CondWait(is->pictq_cond, is->pictq_mutex);
  }
  SDL_UnlockMutex(is->pictq_mutex);
  if(is->quit)
    return -1;
  //从pictq队列中获取一个视频帧数据,如果不符合标准,就重新生成一个
  vp = &is->pictq[is->pictq_windex];
  if(!vp->bmp ||
     vp->width != is->video_ctx->width ||
     vp->height != is->video_ctx->height) {
    vp->allocated = 0;
    alloc_picture(is);
    if(is->quit) {
      return -1;
    }
  }
  //使用swscale来将视频帧数据转换成YUV数据
  if(vp->bmp) {
    vp->pts = pts;
    sws_scale(is->video_sws_ctx, (uint8_t const * const *)pFrame->data,
          pFrame->linesize, 0, is->video_ctx->height,
          vp->bmp->data, vp->bmp->linesize);
    if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) {
      is->pictq_windex = 0;
    }
    SDL_LockMutex(is->pictq_mutex);
    is->pictq_size++;
    SDL_UnlockMutex(is->pictq_mutex);
  }
  return 0;
}

到这里,视频帧的数据最终转换成了YUV数据队列。然后我们需要一个新线程,将YUV数据消耗掉。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
SDL_Rect rect;
  VideoPicture *vp;
  float aspect_ratio;
  int w, h, x, y;
  int i;

  vp = &is->pictq[is->pictq_rindex];
  if(vp->bmp) {

    SDL_UpdateYUVTexture( texture, NULL,
                          vp->bmp->data[0], vp->bmp->linesize[0],
                          vp->bmp->data[1], vp->bmp->linesize[1],
                          vp->bmp->data[2], vp->bmp->linesize[2]);

    rect.x = 0;
    rect.y = 0;
    rect.w = is->video_ctx->width;
    rect.h = is->video_ctx->height;
    SDL_LockMutex(text_mutex);
    SDL_RenderClear( renderer );
    SDL_RenderCopy( renderer, texture, NULL, &rect);
    SDL_RenderPresent( renderer );
    SDL_UnlockMutex(text_mutex);

}

3.音频帧解码

音频帧数据的解码,不需要单独开启线程,SDL的音频播放会在需要数据时,调用初始化时注册的回调方法audio_callback。 参数中userdata表示我们自己注册的参数,stream表示SDL提供的数据槽,我们把要播放的数据填入其中,SDL就可以进行播放了 第三个len,表示此次回调需要的数据最大长度。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
void audio_callback(void *userdata, Uint8 *stream, int len) {

  VideoState *is = (VideoState *)userdata;
  int len1, audio_size;
  double pts;

  SDL_memset(stream, 0, len);
  //如果数据槽填充满,终止当前数据读取流程
  while(len > 0) {
    if(is->audio_buf_index >= is->audio_buf_size) {
      //读取数据
      audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
      //如果数据为空,就填充空数据
      if(audio_size < 0) {
        is->audio_buf_size = 1024 * 2 * 2;
        memset(is->audio_buf, 0, is->audio_buf_size);
      } else {
        is->audio_buf_size = audio_size;
      }
      is->audio_buf_index = 0;
    }
    //判断本次读取操作之后,音频数据槽还剩多少需要填充
    len1 = is->audio_buf_size - is->audio_buf_index;
    if(len1 > len)
      len1 = len;
    //往SDL喂数据
    SDL_MixAudio(stream,(uint8_t *)is->audio_buf + is->audio_buf_index, len1, SDL_MIX_MAXVOLUME);
    len -= len1;
    stream += len1;
    is->audio_buf_index += len1;
  }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
int audio_decode_frame(VideoState *is, uint8_t *audio_buf, int buf_size, double *pts_ptr) {

  int len1, data_size = 0;
  AVPacket *pkt = &is->audio_pkt;
  double pts;
  int n;
  
  for(;;) {
     while(is->audio_pkt_size > 0) {
        int got_frame = 0;
        //从音频帧中读取音频数据
        len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt);
        if(len1 < 0) {
            is->audio_pkt_size = 0;
            break;
        }
        data_size = 0;
        if(got_frame) {
            //计算出音频数据大小
            data_size = av_samples_get_buffer_size(NULL,
                           is->audio_ctx->channels,
                           is->audio_frame.nb_samples,
                           is->audio_ctx->sample_fmt,
                           1);
            assert(data_size <= buf_size);
            //将音频数据重采样
            swr_convert(is->audio_swr_ctx,
                        &audio_buf,
                        MAX_AUDIO_FRAME_SIZE*3/2,
                        (const uint8_t **)is->audio_frame.data,
                        is->audio_frame.nb_samples);
        }
        is->audio_pkt_data += len1;
        is->audio_pkt_size -= len1;
        if(data_size <= 0) {
            continue;
        }
        pts = is->audio_clock;
        *pts_ptr = pts;
        n = 2 * is->audio_ctx->channels;
        //计算出当前数据播放完之后,音频时间位置
        is->audio_clock += (double)data_size /(double)(n * is->audio_ctx->sample_rate);
        return data_size;
    }
    if(pkt->data)
      av_free_packet(pkt);

    if(is->quit) {
      return -1;
    }
    //从音频帧队列读取一次数据
    if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
      return -1;
    }
    is->audio_pkt_data = pkt->data;
    is->audio_pkt_size = pkt->size;
    if(pkt->pts != AV_NOPTS_VALUE) {
      //更新当前音频所在时间戳
      is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts;
    }
  }
}

4.音视频同步

本篇文章采用的同步方式为以音频播放时间为标准,视频同步音频。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) {
  double frame_delay;
  if(pts != 0) {
    is->video_clock = pts;
  } else {
    pts = is->video_clock;
  }
  frame_delay = av_q2d(is->video_ctx->time_base);
  frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
  //校准视频帧的时间戳
  is->video_clock += frame_delay;
  return pts;
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
void video_refresh_timer(void *userdata) {

  VideoState *is = (VideoState *)userdata;
  VideoPicture *vp;
  double actual_delay, delay, sync_threshold, ref_clock, diff;

  if(is->video_st) {
    if(is->pictq_size == 0) {
      //下一次timer执行时间,时间到了继续执行video_refresh_timer 当前方法,形成频率
      schedule_refresh(is, 1);
    } else {
      //获得一个图片数据
      vp = &is->pictq[is->pictq_rindex];

      
      delay = vp->pts - is->frame_last_pts; 
      if(delay <= 0 || delay >= 1.0) {
        delay = is->frame_last_delay;
      }
      is->frame_last_delay = delay;
      is->frame_last_pts = vp->pts;
      ref_clock = get_audio_clock(is);
      //开始计算音频和视频时间戳差值
      diff = vp->pts - ref_clock;
      sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
      if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
        if(diff <= -sync_threshold) {
            delay = 0;
        } else if(diff >= sync_threshold) {
            delay = 2 * delay;
        }
      }
      //获得下一次渲染的时间戳
      is->frame_timer += delay;
      //计算出需要的实际延时长度
      actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
      if(actual_delay < 0.010) {
        actual_delay = 0.010;
      }
      //开启一个新的timer等待程序,actual_delay之后,开启下次渲染
      schedule_refresh(is, (int)(actual_delay * 1000 + 0.5));
      //图片渲染到SDL
      video_display(is);

      if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
        is->pictq_rindex = 0;
      }
      SDL_LockMutex(is->pictq_mutex);
      is->pictq_size--;
      SDL_CondSignal(is->pictq_cond);
      SDL_UnlockMutex(is->pictq_mutex);
    }
  } else {
    schedule_refresh(is, 100);
  }
}