Record RTSP stream with FFmpeg libavformat

耗尽温柔 提交于 2019-11-28 16:54:33

Here's how I do it. What I found was when receiving H264 the framerate in the stream is not correct. It sends 1/90000 Timebase. I skip initializing the new stream from the incoming stream and just copy certain parameters. The incoming r_frame_rate should be accurate if max_analyze_frames works correctly.

#include <stdio.h>
#include <stdlib.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <sys/time.h>

time_t get_time()
{
  struct timeval tv;

  gettimeofday( &tv, NULL );

  return tv.tv_sec; 
} 

int main( int argc, char* argv[] )
{
  AVFormatContext *ifcx = NULL;
  AVInputFormat *ifmt;
  AVCodecContext *iccx;
  AVCodec *icodec;
  AVStream *ist;
  int i_index;
  time_t timenow, timestart;
  int got_key_frame = 0;

  AVFormatContext *ofcx;
  AVOutputFormat *ofmt;
  AVCodecContext *occx;
  AVCodec *ocodec;
  AVStream *ost;
  int o_index;

  AVPacket pkt;

  int ix;

  const char *sProg = argv[ 0 ];
  const char *sFileInput;
  const char *sFileOutput;
  int bRunTime;

  if ( argc != 4 ) {
    printf( "Usage: %s url outfile runtime\n", sProg );
    return EXIT_FAILURE;
  } 
  sFileInput = argv[ 1 ];
  sFileOutput = argv[ 2 ];
  bRunTime = atoi( argv[ 3 ] );

  // Initialize library
  av_log_set_level( AV_LOG_DEBUG );
  av_register_all();
  avcodec_register_all(); 
  avformat_network_init();

  //
  // Input
  //

  //open rtsp
  if ( avformat_open_input( &ifcx, sFileInput, NULL, NULL) != 0 ) {
    printf( "ERROR: Cannot open input file\n" );
    return EXIT_FAILURE;
  }

  if ( avformat_find_stream_info( ifcx, NULL ) < 0 ) {
    printf( "ERROR: Cannot find stream info\n" );
    avformat_close_input( &ifcx );
    return EXIT_FAILURE;
  }

  snprintf( ifcx->filename, sizeof( ifcx->filename ), "%s", sFileInput );

  //search video stream
  i_index = -1;
  for ( ix = 0; ix < ifcx->nb_streams; ix++ ) {
    iccx = ifcx->streams[ ix ]->codec;
    if ( iccx->codec_type == AVMEDIA_TYPE_VIDEO ) {
      ist = ifcx->streams[ ix ];
      i_index = ix;
      break;
    }
  }
  if ( i_index < 0 ) {
    printf( "ERROR: Cannot find input video stream\n" );
    avformat_close_input( &ifcx );
    return EXIT_FAILURE;
  }

  //
  // Output
  //

  //open output file
  ofmt = av_guess_format( NULL, sFileOutput, NULL );
  ofcx = avformat_alloc_context();
  ofcx->oformat = ofmt;
  avio_open2( &ofcx->pb, sFileOutput, AVIO_FLAG_WRITE, NULL, NULL );

  // Create output stream
  //ost = avformat_new_stream( ofcx, (AVCodec *) iccx->codec );
  ost = avformat_new_stream( ofcx, NULL );
  avcodec_copy_context( ost->codec, iccx );

  ost->sample_aspect_ratio.num = iccx->sample_aspect_ratio.num;
  ost->sample_aspect_ratio.den = iccx->sample_aspect_ratio.den;

  // Assume r_frame_rate is accurate
  ost->r_frame_rate = ist->r_frame_rate;
  ost->avg_frame_rate = ost->r_frame_rate;
  ost->time_base = av_inv_q( ost->r_frame_rate );
  ost->codec->time_base = ost->time_base;

  avformat_write_header( ofcx, NULL );

  snprintf( ofcx->filename, sizeof( ofcx->filename ), "%s", sFileOutput );

  //start reading packets from stream and write them to file

  av_dump_format( ifcx, 0, ifcx->filename, 0 );
  av_dump_format( ofcx, 0, ofcx->filename, 1 );

  timestart = timenow = get_time();

  ix = 0;
  //av_read_play(context);//play RTSP (Shouldn't need this since it defaults to playing on connect)
  av_init_packet( &pkt );
  while ( av_read_frame( ifcx, &pkt ) >= 0 && timenow - timestart <= bRunTime ) {
    if ( pkt.stream_index == i_index ) { //packet is video               
      // Make sure we start on a key frame
      if ( timestart == timenow && ! ( pkt.flags & AV_PKT_FLAG_KEY ) ) {
        timestart = timenow = get_time();
        continue;
      }
      got_key_frame = 1;

      pkt.stream_index = ost->id;

      pkt.pts = ix++;
      pkt.dts = pkt.pts;

      av_interleaved_write_frame( ofcx, &pkt );
    }
    av_free_packet( &pkt );
    av_init_packet( &pkt );

    timenow = get_time();
  }
  av_read_pause( ifcx );
  av_write_trailer( ofcx );
  avio_close( ofcx->pb );
  avformat_free_context( ofcx );

  avformat_network_deinit();

  return EXIT_SUCCESS;
}

I don't think you're supposed to just increment the PTS value like that. It might work in rare occasions where the time base is just right, but for the general case it won't work.

You should change this:

pkt.pts = ix++;
pkt.dts = pkt.pts;

To this:

pkt.pts = av_rescale_q(pkt.pts, ifcx->streams[0]->codec->time_base, ofcx->streams[0]->time_base);
pkt.dts = av_rescale_q(pkt.dts, ifcx->streams[0]->codec->time_base, ofcx->streams[0]->time_base);

What that does is convert the packet's PTS/DTS from the units used in the input stream's codec to the units of the output stream.

Also, some streams have multiple ticks-per-frame, so if the video runs at double speed you might need to this right below the above line:

pkt.pts *= ifcx->streams[0]->codec->ticks_per_frame;
pkt.dts *= ifcx->streams[0]->codec->ticks_per_frame;

In my experience with a modern H.264 encoder, I'm finding that the duration returned by ffmpeg is only a "suggestion" and that there is some "jitter" in the PTS. The only accurate way to determine frame rate or duration is to measure it yourself using the PTS values.

For an H.264 encoder running at 30fps, duration is always reported as 3000/90000, while measured duration is usually +/- 1 but periodically jumps say 3000+25 one frame and 3000-25 the next. I'm smoothing this out for recording by noticing any adjacent frames with opposite deviation and adjusting the PTS of the 2nd frame while preserving the total duration.

This give me a stream with an occasional (calculated) duration of 30001 or 2999, reflecting clock drift.

When recording a 29.97fps stream, av_read_frame() always returns a duration of 3000, while the nominal calculated duration is 3003 (correct for 29.97) with the same jitter and drift as described above.

In my case, I just built a state machine to clean up the timing. Hoping this helps someone.

Recently was doing the same. I had FPS twice lower than the camera sent. The reason was in AVstream->codec->ticks_per_frame field, set to 2. My source was progressive, and in case yours in interleaved - then that might be a reason of yet another factor of 2, giving 4x different FPS. 90000 Hz is the default timebase for video stream sent via RTSP. Timebase is different from FPS in resolution. For instance, a frame with timestamp 30000 will be shown at 1/3 sec if the timebase is 90000 Hz. The timebase should be put into the AVstream structure during the output, but AVFormatContext should have a real FPS value.

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!