How to extract elementary video from mp4 using ffmpeg programmatically?

问题

I have started learning ffmpeg few weaks ago. At the moment I am able to transcode any video to mp4 using h264/AVC codec. The main scheme is something like that:

-open input
-demux
-decode
-encode
-mux

The actual code is below:

#include <iostream>

#include <math.h>

extern "C" {

  #
  ifndef __STDC_CONSTANT_MACROS# undef main /* Prevents SDL from overriding main() */ # define __STDC_CONSTANT_MACROS# endif

  # pragma comment(lib, "avcodec.lib")# pragma comment(lib, "avformat.lib")# pragma comment(lib, "swscale.lib")# pragma comment(lib, "avutil.lib")

  #include <libavcodec\avcodec.h>
  #include <libavformat\avformat.h>
  #include <libswscale\swscale.h>
  #include <libavutil\mem.h>
  #include <libavutil/opt.h>
  #include <libavutil\channel_layout.h>
  #include <libavutil\common.h>
  #include <libavutil\imgutils.h>
  #include <libavutil\mathematics.h>
  #include <libavutil\samplefmt.h>
}

using namespace std;

void open_video(AVFormatContext * oc, AVCodec * codec, AVStream * st) {
  int ret;
  AVCodecContext * c;
  c = st - > codec;

  /*open codec */

  cout << "probably starts here" << endl;
  ret = avcodec_open2(c, codec, NULL);
  cout << "and ends here" << endl;

  if (ret < 0) {
    cout << ("Could not open video codec") << endl;
  }

}

/*This function will add a new stream to our file.
@param
oc -> Format context that the new stream will be added.
codec -> codec of the stream, this will be passed.
codec_id -> 
chWidth->
chHeight->
*/

AVStream * addStream(AVFormatContext * oc, AVCodec ** codec, enum AVCodecID codec_id, int chWidth, int chHeight, int fps) {
  AVCodecContext * c;
  AVStream * st;

  //find encoder of the stream, it passes this information to @codec, later on
  //it will be used in encoding the video @ avcodec_encode_video2 in loop.
  * codec = avcodec_find_encoder(AV_CODEC_ID_H264);

  if (( * codec) == NULL)
    cout << "ERROR CAN NOT FIND ENCODER! ERROR! ERROR! AVCODEC_FIND_ENCODER FAILED !!!1 "
  "" << endl;

  if (!( * codec))
    printf("Could not find encoder for ' %s ' ", avcodec_get_name(codec_id));

  //create a new stream with the found codec inside oc(AVFormatContext).
  st = avformat_new_stream(oc, * codec);

  if (!st)
    cout << " Cannot allocate stream " << endl;

  //Setting the stream id.
  //Since, there can be other streams in this AVFormatContext,
  //we should find the first non used index. And this is oc->nb_streams(number of streams) - 1
  st - > id = oc - > nb_streams - 1;

  c = st - > codec;

  //setting the stream's codec's properties.
  c - > codec_id = codec_id;
  c - > bit_rate = 4000000;
  c - > width = chWidth;
  c - > height = chHeight;
  c - > time_base.den = fps;
  //fps;
  c - > time_base.num = 1;
  c - > gop_size = 12;
  c - > pix_fmt = AV_PIX_FMT_YUV420P;

  if (c - > codec_id == AV_CODEC_ID_MPEG2VIDEO) {
    /* just for testing, we also add B frames */
    c - > max_b_frames = 2;
  }

  if (c - > codec_id == AV_CODEC_ID_MPEG1VIDEO) {
    /* Needed to avoid using macroblocks in which some coeffs overflow.
     * This does not happen with normal video, it just happens here as
     * the motion of the chroma plane does not match the luma plane. */
    c - > mb_decision = 2;
  }

  /* Some formats want stream headers to be separate. */
  if (oc - > oformat - > flags & AVFMT_GLOBALHEADER)
    c - > flags |= CODEC_FLAG_GLOBAL_HEADER;

  //returning our lovely new brand stream.
  return st;

}

int changeResolution(string source, int format) {
  //Data members
  struct SwsContext * sws_ctx = NULL;
  AVFrame * pFrame = NULL;
  AVFrame * outFrame = NULL;
  AVPacket packet;
  uint8_t * buffer = NULL;
  uint8_t endcode[] = {
    0,
    0,
    1,
    0xb7
  };
  AVDictionary * optionsDict = NULL;
  AVFormatContext * pFormatCtx = NULL;
  AVFormatContext * outputContext = NULL;
  AVCodecContext * pCodecCtx;
  AVCodec * pCodec;
  AVCodec * codec;
  AVCodec * videoCodec;
  AVOutputFormat * fmt;
  AVStream * video_stream;
  int changeWidth;
  int changeHeight;
  int frameFinished;
  int numBytes;
  int fps;

  int lock = 0;

  //Register all codecs & other important stuff. Vital!..
  av_register_all();

  //Selects the desired resolution.
  if (format == 0) {
    changeWidth = 320;
    changeHeight = 180;
  } else if (format == 1) {
    changeWidth = 640;
    changeHeight = 480;

  } else if (format == 2) {
    changeWidth = 960;
    changeHeight = 540;

  } else if (format == 3) {
    changeWidth = 1024;
    changeHeight = 768;

  } else {
    changeWidth = 1280;
    changeHeight = 720;
  }

  // Open video file
  int aaa;
  aaa = avformat_open_input( & pFormatCtx, source.c_str(), NULL, NULL);
  if (aaa != 0) {
    cout << " cannot open input file \n" << endl;
    cout << "aaa = " << aaa << endl;
    return -1; // Couldn't open file    
  }

  // Retrieve stream information
  if (av_find_stream_info(pFormatCtx) < 0)
    return -1; // Couldn't find stream information

  //just checking duration casually for no reason
  /*int64_t duration = pFormatCtx->duration;

  cout << "the duration is " << duration << " " << endl;*/

  //this writes the info about the file
  av_dump_format(pFormatCtx, 0, 0, 0);
  cin >> lock;

  // Find the first video stream
  int videoStream = -1;
  int i;

  for (i = 0; i < 3; i++)
    if (pFormatCtx - > streams[i] - > codec - > codec_type == AVMEDIA_TYPE_VIDEO) {
      videoStream = i;
      cout << " lel \n ";
      break;

    }

  if (videoStream == -1)
    return -1; // Didn't find a video stream

  // Get a pointer to the codec context for the video stream
  pCodecCtx = pFormatCtx - > streams[videoStream] - > codec;
  fps = pCodecCtx - > time_base.den;

  //Find the decoder of the input file, for the video stream
  pCodec = avcodec_find_decoder(pCodecCtx - > codec_id);

  if (pCodec == NULL) {
    fprintf(stderr, "Unsupported codec!\n");
    return -1; // Codec not found
  }

  // Open codec, you must open it first, in order to use it.
  if (avcodec_open2(pCodecCtx, pCodec, & optionsDict) < 0)
    return -1; // Could not open codec

  // Allocate video frame ( pFrame for taking the packets into, outFrame for processed frames to packet.)
  pFrame = avcodec_alloc_frame();
  outFrame = avcodec_alloc_frame();

  i = 0;

  int ret;
  int video_frame_count = 0;

  //Initiate the outFrame set the buffer & fill the properties
  numBytes = avpicture_get_size(PIX_FMT_YUV420P, changeWidth, changeHeight);
  buffer = (uint8_t * ) av_malloc(numBytes * sizeof(uint8_t));
  avpicture_fill((AVPicture * ) outFrame, buffer, PIX_FMT_YUV420P, changeWidth, changeHeight);

  int pp;
  int frameNo = 0;

  //allocate the outputContext, it will be the AVFormatContext of our output file. 
  //It will try to find the format by giving the file name.
  avformat_alloc_output_context2( & outputContext, NULL, NULL, "myoutput.mp4");

  //Cant find the file extension, using MPEG as default.
  if (!outputContext) {
    printf("Could not deduce output format from file extension: using MPEG.\n");
    avformat_alloc_output_context2( & outputContext, NULL, "mpeg", "myoutput.mp4");
  }

  //Still cant set file extension, exit.
  if (!outputContext) {
    return 1;
  }

  //set AVOutputFormat fmt to our outputContext's format.
  fmt = outputContext - > oformat;
  video_stream = NULL;

  //If fmt has a valid codec_id, create a new video stream.
  //This function will set the streams codec & codecs desired properties.
  //Stream's codec will be passed to videoCodec for later usage.
  if (fmt - > video_codec != AV_CODEC_ID_NONE)
    video_stream = addStream(outputContext, & videoCodec, fmt - > video_codec, changeWidth, changeHeight, fps);

  //open the video using videoCodec. by avcodec_open2() i.e open the codec.
  if (video_stream)
    open_video(outputContext, videoCodec, video_stream);

  //Creating our new output file.
  if (!(fmt - > flags & AVFMT_NOFILE)) {
    ret = avio_open( & outputContext - > pb, "toBeStreamed.264", AVIO_FLAG_WRITE);
    if (ret < 0) {
      cout << " cant open file " << endl;
      return 1;
    }
  }

  //Writing the header of format context.
  //ret = avformat_write_header(outputContext, NULL);

  if (ret >= 0) {
    cout << "writing header success !!!" << endl;
  }

  //Start reading packages from input file.
  while (av_read_frame(pFormatCtx, & packet) >= 0) {

    // Is this a packet from the video stream?  
    if (packet.stream_index == videoStream) {

      // Decode video package into frames
      ret = avcodec_decode_video2(pCodecCtx, pFrame, & frameFinished, & packet);

      if (ret < 0) {
        printf(" Error decoding frame !!..");
        return ret;
      }

      if (frameFinished) {
        printf("video_frame n:%d    coded_n:%d\n", video_frame_count++, pFrame - > coded_picture_number);
      }

      av_free_packet( & packet);

      //do stuff with frame, in this case we are changing the resolution.
      static struct SwsContext * img_convert_ctx_in = NULL;
      if (img_convert_ctx_in == NULL) {
        img_convert_ctx_in = sws_getContext(pCodecCtx - > width,
          pCodecCtx - > height,
          pCodecCtx - > pix_fmt,
          changeWidth,
          changeHeight,
          PIX_FMT_YUV420P,
          SWS_BICUBIC,
          NULL,
          NULL,
          NULL);

      }
      //scale the frames
      sws_scale(img_convert_ctx_in,
        pFrame - > data,
        pFrame - > linesize,
        0,
        pCodecCtx - > height,
        outFrame - > data,
        outFrame - > linesize);

      //initiate the pts value
      if (frameNo == 0)
        outFrame - > pts = 0;

      //calculate the pts value & set it.
      outFrame - > pts += av_rescale_q(1, video_stream - > codec - > time_base, video_stream - > time_base);

      //encode frames into packages. Package passed in @packet.
      if (avcodec_encode_video2(outputContext - > streams[0] - > codec, & packet, outFrame, & pp) < 0)
        cout << "Encoding frames into packages, failed. " << endl;

      frameNo++;

      //write the packages into file, resulting in creating a video file.
      av_interleaved_write_frame(outputContext, & packet);

    }

  }

  av_free_packet( & packet);
  //av_write_trailer(outputContext);

  avio_close(outputContext - > pb);

  // Free the RGB image
  av_free(buffer);
  av_free(outFrame);

  // Free the YUV frame
  av_free(pFrame);

  // Close the codec
  avcodec_close(video_stream - > codec);
  avcodec_close(pCodecCtx);

  // Close the video file
  avformat_close_input( & pFormatCtx);

  return 0;
}

at the end of the process I get my desired file with desired codec & container & resolution.

My problem is, in a part of our project I need to get elementary video streams IN file. Such as example.264. However I can not add a stream without creating an AVFormatContext. I can not create an AVFormatContext because 264 files does not have a container,they are just raw video?, as far as I know.

I have tried the way in decoding_encoding.c which uses fwrite. However that example was for mpeg-2 codec and when I try to adapt that code to H264/AVC codec, I got "floating point division by zero" error from mediainfo and moreover, some of the properties of the video was not showing (such as FPS & playtime & quality factor). I think it has to do with the "endcode" the example adds at the end of the code. It is for mpeg-2. ( uint8_t endcode[] = { 0, 0, 1, 0xb7 }; )

Anyway, I would love to get a startpoint for this task. I have managed to come this far by using internet resources ( quite few & outdated for ffmpeg) but now I'm stuck a little.

来源：https://stackoverflow.com/questions/17470153/how-to-extract-elementary-video-from-mp4-using-ffmpeg-programmatically

标签

c++

ffmpeg

h.264

video-conversion