🚽 🌓 👋🏻 [Part 2/2] Guide to FFmpeg and SDL or How to write a video player in less than 1000 lines 🔑 🧓🏾 👩🏽‍💼

We post the rest of the translation into Russian of the manual, which is somewhat outdated, but has not lost its relevance, since this tutorial helps to understand the “kitchen” of creating video applications using the FFmpeg and SDL libraries.

And although we tried, difficulties of translation are inevitable in such a voluminous text . Report bugs (preferably in private messages) - together we will do better.

Part 1	Part 2
Preface Lesson 1: Creating Screencasts Lesson 2: Displaying the Lesson 3: Playing Sound Lesson 4: Multiple Threads Lesson 5: Synchronizing Video	Lesson 6: Audio synchronization Lesson 7: Search Afterword Appendix 1. List of functions Appendix 2. Data structures Links

EDISON.

, .

, , Axxon Next SureView Immix.

! ;-)

6: ← ⇑ →

tutorial06.c

// tutorial05.c
// A pedagogical video player that really works!
//
// Code based on FFplay, Copyright (c) 2003 Fabrice Bellard, 
// and a tutorial by Martin Bohme (boehme@inb.uni-luebeckREMOVETHIS.de)
// Tested on Gentoo, CVS version 5/01/07 compiled with GCC 4.1.1
// With updates from https://github.com/chelyaev/ffmpeg-tutorial
// Updates tested on:
// LAVC 54.59.100, LAVF 54.29.104, LSWS 2.1.101, SDL 1.2.15
// on GCC 4.7.2 in Debian February 2015
// Use
//
// gcc -o tutorial05 tutorial05.c -lavformat -lavcodec -lswscale -lz -lm `sdl-config --cflags --libs`
// to build (assuming libavformat and libavcodec are correctly installed, 
// and assuming you have sdl-config. Please refer to SDL docs for your installation.)
//
// Run using
// tutorial04 myvideofile.mpg
//
// to play the video stream on your screen.

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>

#include <SDL.h>
#include <SDL_thread.h>

#ifdef __MINGW32__
#undef main /* Prevents SDL from overriding main() */
#endif

#include <stdio.h>
#include <assert.h>
#include <math.h>

// compatibility with newer API
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#define av_frame_free avcodec_free_frame
#endif

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000

#define MAX_AUDIOQ_SIZE (5 * 16 * 1024)
#define MAX_VIDEOQ_SIZE (5 * 256 * 1024)

#define AV_SYNC_THRESHOLD 0.01
#define AV_NOSYNC_THRESHOLD 10.0

#define SAMPLE_CORRECTION_PERCENT_MAX 10
#define AUDIO_DIFF_AVG_NB 20

#define FF_REFRESH_EVENT (SDL_USEREVENT)
#define FF_QUIT_EVENT (SDL_USEREVENT + 1)

#define VIDEO_PICTURE_QUEUE_SIZE 1

#define DEFAULT_AV_SYNC_TYPE AV_SYNC_VIDEO_MASTER

typedef struct PacketQueue {
  AVPacketList *first_pkt, *last_pkt;
  int nb_packets;
  int size;
  SDL_mutex *mutex;
  SDL_cond *cond;
} PacketQueue;


typedef struct VideoPicture {
  SDL_Overlay *bmp;
  int width, height; /* source height & width */
  int allocated;
  double pts;
} VideoPicture;

typedef struct VideoState {

  AVFormatContext *pFormatCtx;
  int             videoStream, audioStream;

  int             av_sync_type;
  double          external_clock; /* external clock base */
  int64_t         external_clock_time;

  double          audio_clock;
  AVStream        *audio_st;
  AVCodecContext  *audio_ctx;
  PacketQueue     audioq;
  uint8_t         audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
  unsigned int    audio_buf_size;
  unsigned int    audio_buf_index;
  AVFrame         audio_frame;
  AVPacket        audio_pkt;
  uint8_t         *audio_pkt_data;
  int             audio_pkt_size;
  int             audio_hw_buf_size;
  double          audio_diff_cum; /* used for AV difference average computation */
  double          audio_diff_avg_coef;
  double          audio_diff_threshold;
  int             audio_diff_avg_count;
  double          frame_timer;
  double          frame_last_pts;
  double          frame_last_delay;
  double          video_clock; ///<pts of last decoded frame / predicted pts of next decoded frame
  double          video_current_pts; ///<current displayed pts (different from video_clock if frame fifos are used)
  int64_t         video_current_pts_time;  ///<time (av_gettime) at which we updated video_current_pts - used to have running video pts
  AVStream        *video_st;
  AVCodecContext  *video_ctx;
  PacketQueue     videoq;
  struct SwsContext *sws_ctx;

  VideoPicture    pictq[VIDEO_PICTURE_QUEUE_SIZE];
  int             pictq_size, pictq_rindex, pictq_windex;
  SDL_mutex       *pictq_mutex;
  SDL_cond        *pictq_cond;
  
  SDL_Thread      *parse_tid;
  SDL_Thread      *video_tid;

  char            filename[1024];
  int             quit;
} VideoState;

enum {
  AV_SYNC_AUDIO_MASTER,
  AV_SYNC_VIDEO_MASTER,
  AV_SYNC_EXTERNAL_MASTER,
};

SDL_Surface     *screen;
SDL_mutex       *screen_mutex;

/* Since we only have one decoding thread, the Big Struct
   can be global in case we need it. */
VideoState *global_video_state;

void packet_queue_init(PacketQueue *q) {
  memset(q, 0, sizeof(PacketQueue));
  q->mutex = SDL_CreateMutex();
  q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

  AVPacketList *pkt1;
  if(av_dup_packet(pkt) < 0) {
    return -1;
  }
  pkt1 = av_malloc(sizeof(AVPacketList));
  if (!pkt1)
    return -1;
  pkt1->pkt = *pkt;
  pkt1->next = NULL;
  
  SDL_LockMutex(q->mutex);

  if (!q->last_pkt)
    q->first_pkt = pkt1;
  else
    q->last_pkt->next = pkt1;
  q->last_pkt = pkt1;
  q->nb_packets++;
  q->size += pkt1->pkt.size;
  SDL_CondSignal(q->cond);
  
  SDL_UnlockMutex(q->mutex);
  return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
  AVPacketList *pkt1;
  int ret;

  SDL_LockMutex(q->mutex);
  
  for(;;) {
    
    if(global_video_state->quit) {
      ret = -1;
      break;
    }

    pkt1 = q->first_pkt;
    if (pkt1) {
      q->first_pkt = pkt1->next;
      if (!q->first_pkt)
	q->last_pkt = NULL;
      q->nb_packets--;
      q->size -= pkt1->pkt.size;
      *pkt = pkt1->pkt;
      av_free(pkt1);
      ret = 1;
      break;
    } else if (!block) {
      ret = 0;
      break;
    } else {
      SDL_CondWait(q->cond, q->mutex);
    }
  }
  SDL_UnlockMutex(q->mutex);
  return ret;
}

double get_audio_clock(VideoState *is) {
  double pts;
  int hw_buf_size, bytes_per_sec, n;
  
  pts = is->audio_clock; /* maintained in the audio thread */
  hw_buf_size = is->audio_buf_size - is->audio_buf_index;
  bytes_per_sec = 0;
  n = is->audio_ctx->channels * 2;
  if(is->audio_st) {
    bytes_per_sec = is->audio_ctx->sample_rate * n;
  }
  if(bytes_per_sec) {
    pts -= (double)hw_buf_size / bytes_per_sec;
  }
  return pts;
}
double get_video_clock(VideoState *is) {
  double delta;

  delta = (av_gettime() - is->video_current_pts_time) / 1000000.0;
  return is->video_current_pts + delta;
}
double get_external_clock(VideoState *is) {
  return av_gettime() / 1000000.0;
}

double get_master_clock(VideoState *is) {
  if(is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
    return get_video_clock(is);
  } else if(is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
    return get_audio_clock(is);
  } else {
    return get_external_clock(is);
  }
}


/* Add or subtract samples to get a better sync, return new
   audio buffer size */
int synchronize_audio(VideoState *is, short *samples,
		      int samples_size, double pts) {
  int n;
  double ref_clock;

  n = 2 * is->audio_ctx->channels;
  
  if(is->av_sync_type != AV_SYNC_AUDIO_MASTER) {
    double diff, avg_diff;
    int wanted_size, min_size, max_size /*, nb_samples */;
    
    ref_clock = get_master_clock(is);
    diff = get_audio_clock(is) - ref_clock;

    if(diff < AV_NOSYNC_THRESHOLD) {
      // accumulate the diffs
      is->audio_diff_cum = diff + is->audio_diff_avg_coef
	* is->audio_diff_cum;
      if(is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
	is->audio_diff_avg_count++;
      } else {
	avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
	if(fabs(avg_diff) >= is->audio_diff_threshold) {
	  wanted_size = samples_size + ((int)(diff * is->audio_ctx->sample_rate) * n);
	  min_size = samples_size * ((100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100);
	  max_size = samples_size * ((100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100);
	  if(wanted_size < min_size) {
	    wanted_size = min_size;
	  } else if (wanted_size > max_size) {
	    wanted_size = max_size;
	  }
	  if(wanted_size < samples_size) {
	    /* remove samples */
	    samples_size = wanted_size;
	  } else if(wanted_size > samples_size) {
	    uint8_t *samples_end, *q;
	    int nb;

	    /* add samples by copying final sample*/
	    nb = (samples_size - wanted_size);
	    samples_end = (uint8_t *)samples + samples_size - n;
	    q = samples_end + n;
	    while(nb > 0) {
	      memcpy(q, samples_end, n);
	      q += n;
	      nb -= n;
	    }
	    samples_size = wanted_size;
	  }
	}
      }
    } else {
      /* difference is TOO big; reset diff stuff */
      is->audio_diff_avg_count = 0;
      is->audio_diff_cum = 0;
    }
  }
  return samples_size;
}

int audio_decode_frame(VideoState *is, uint8_t *audio_buf, int buf_size, double *pts_ptr) {

  int len1, data_size = 0;
  AVPacket *pkt = &is->audio_pkt;
  double pts;
  int n;

  for(;;) {
    while(is->audio_pkt_size > 0) {
      int got_frame = 0;
      len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt);
      if(len1 < 0) {
	/* if error, skip frame */
	is->audio_pkt_size = 0;
	break;
      }
      data_size = 0;
      if(got_frame) {
	data_size = av_samples_get_buffer_size(NULL, 
					       is->audio_ctx->channels,
					       is->audio_frame.nb_samples,
					       is->audio_ctx->sample_fmt,
					       1);
	assert(data_size <= buf_size);
	memcpy(audio_buf, is->audio_frame.data[0], data_size);
      }
      is->audio_pkt_data += len1;
      is->audio_pkt_size -= len1;
      if(data_size <= 0) {
	/* No data yet, get more frames */
	continue;
      }
      pts = is->audio_clock;
      *pts_ptr = pts;
      n = 2 * is->audio_ctx->channels;
      is->audio_clock += (double)data_size /
	(double)(n * is->audio_ctx->sample_rate);
      /* We have data, return it and come back for more later */
      return data_size;
    }
    if(pkt->data)
      av_free_packet(pkt);

    if(is->quit) {
      return -1;
    }
    /* next packet */
    if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
      return -1;
    }
    is->audio_pkt_data = pkt->data;
    is->audio_pkt_size = pkt->size;
    /* if update, update the audio clock w/pts */
    if(pkt->pts != AV_NOPTS_VALUE) {
      is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts;
    }
  }
}

void audio_callback(void *userdata, Uint8 *stream, int len) {

  VideoState *is = (VideoState *)userdata;
  int len1, audio_size;
  double pts;

  while(len > 0) {
    if(is->audio_buf_index >= is->audio_buf_size) {
      /* We have already sent all our data; get more */
      audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
      if(audio_size < 0) {
	/* If error, output silence */
	is->audio_buf_size = 1024;
	memset(is->audio_buf, 0, is->audio_buf_size);
      } else {
	audio_size = synchronize_audio(is, (int16_t *)is->audio_buf,
				       audio_size, pts);
	is->audio_buf_size = audio_size;
      }
      is->audio_buf_index = 0;
    }
    len1 = is->audio_buf_size - is->audio_buf_index;
    if(len1 > len)
      len1 = len;
    memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
    len -= len1;
    stream += len1;
    is->audio_buf_index += len1;
  }
}

static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) {
  SDL_Event event;
  event.type = FF_REFRESH_EVENT;
  event.user.data1 = opaque;
  SDL_PushEvent(&event);
  return 0; /* 0 means stop timer */
}

/* schedule a video refresh in 'delay' ms */
static void schedule_refresh(VideoState *is, int delay) {
  SDL_AddTimer(delay, sdl_refresh_timer_cb, is);
}

void video_display(VideoState *is) {

  SDL_Rect rect;
  VideoPicture *vp;
  float aspect_ratio;
  int w, h, x, y;
  int i;

  vp = &is->pictq[is->pictq_rindex];
  if(vp->bmp) {
    if(is->video_ctx->sample_aspect_ratio.num == 0) {
      aspect_ratio = 0;
    } else {
      aspect_ratio = av_q2d(is->video_ctx->sample_aspect_ratio) *
	is->video_ctx->width / is->video_ctx->height;
    }
    if(aspect_ratio <= 0.0) {
      aspect_ratio = (float)is->video_ctx->width /
	(float)is->video_ctx->height;
    }
    h = screen->h;
    w = ((int)rint(h * aspect_ratio)) & -3;
    if(w > screen->w) {
      w = screen->w;
      h = ((int)rint(w / aspect_ratio)) & -3;
    }
    x = (screen->w - w) / 2;
    y = (screen->h - h) / 2;
    
    rect.x = x;
    rect.y = y;
    rect.w = w;
    rect.h = h;
    SDL_LockMutex(screen_mutex);
    SDL_DisplayYUVOverlay(vp->bmp, &rect);
    SDL_UnlockMutex(screen_mutex);
  }
}

void video_refresh_timer(void *userdata) {

  VideoState *is = (VideoState *)userdata;
  VideoPicture *vp;
  double actual_delay, delay, sync_threshold, ref_clock, diff;
  
  if(is->video_st) {
    if(is->pictq_size == 0) {
      schedule_refresh(is, 1);
    } else {
      vp = &is->pictq[is->pictq_rindex];
      
      is->video_current_pts = vp->pts;
      is->video_current_pts_time = av_gettime();
      delay = vp->pts - is->frame_last_pts; /* the pts from last time */
      if(delay <= 0 || delay >= 1.0) {
	/* if incorrect delay, use previous one */
	delay = is->frame_last_delay;
      }
      /* save for next time */
      is->frame_last_delay = delay;
      is->frame_last_pts = vp->pts;



      /* update delay to sync to audio if not master source */
      if(is->av_sync_type != AV_SYNC_VIDEO_MASTER) {
	ref_clock = get_master_clock(is);
	diff = vp->pts - ref_clock;
	
	/* Skip or repeat the frame. Take delay into account
	   FFPlay still doesn't "know if this is the best guess." */
	sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
	if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
	  if(diff <= -sync_threshold) {
	    delay = 0;
	  } else if(diff >= sync_threshold) {
	    delay = 2 * delay;
	  }
	}
      }
      is->frame_timer += delay;
      /* computer the REAL delay */
      actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
      if(actual_delay < 0.010) {
	/* Really it should skip the picture instead */
	actual_delay = 0.010;
      }
      schedule_refresh(is, (int)(actual_delay * 1000 + 0.5));
      
      /* show the picture! */
      video_display(is);
      
      /* update queue for next picture! */
      if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
	is->pictq_rindex = 0;
      }
      SDL_LockMutex(is->pictq_mutex);
      is->pictq_size--;
      SDL_CondSignal(is->pictq_cond);
      SDL_UnlockMutex(is->pictq_mutex);
    }
  } else {
    schedule_refresh(is, 100);
  }
}
      
void alloc_picture(void *userdata) {

  VideoState *is = (VideoState *)userdata;
  VideoPicture *vp;

  vp = &is->pictq[is->pictq_windex];
  if(vp->bmp) {
    // we already have one make another, bigger/smaller
    SDL_FreeYUVOverlay(vp->bmp);
  }
  // Allocate a place to put our YUV image on that screen
  SDL_LockMutex(screen_mutex);
  vp->bmp = SDL_CreateYUVOverlay(is->video_ctx->width,
				 is->video_ctx->height,
				 SDL_YV12_OVERLAY,
				 screen);
  SDL_UnlockMutex(screen_mutex);

  vp->width = is->video_ctx->width;
  vp->height = is->video_ctx->height;
  vp->allocated = 1;

}

int queue_picture(VideoState *is, AVFrame *pFrame, double pts) {

  VideoPicture *vp;
  int dst_pix_fmt;
  AVPicture pict;

  /* wait until we have space for a new pic */
  SDL_LockMutex(is->pictq_mutex);
  while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE &&
	!is->quit) {
    SDL_CondWait(is->pictq_cond, is->pictq_mutex);
  }
  SDL_UnlockMutex(is->pictq_mutex);

  if(is->quit)
    return -1;

  // windex is set to 0 initially
  vp = &is->pictq[is->pictq_windex];

  /* allocate or resize the buffer! */
  if(!vp->bmp ||
     vp->width != is->video_ctx->width ||
     vp->height != is->video_ctx->height) {
    SDL_Event event;

    vp->allocated = 0;
    alloc_picture(is);
    if(is->quit) {
      return -1;
    }
  }

  /* We have a place to put our picture on the queue */

  if(vp->bmp) {

    SDL_LockYUVOverlay(vp->bmp);
    vp->pts = pts;
    
    dst_pix_fmt = PIX_FMT_YUV420P;
    /* point pict at the queue */

    pict.data[0] = vp->bmp->pixels[0];
    pict.data[1] = vp->bmp->pixels[2];
    pict.data[2] = vp->bmp->pixels[1];
    
    pict.linesize[0] = vp->bmp->pitches[0];
    pict.linesize[1] = vp->bmp->pitches[2];
    pict.linesize[2] = vp->bmp->pitches[1];
    
    // Convert the image into YUV format that SDL uses
    sws_scale(is->sws_ctx, (uint8_t const * const *)pFrame->data,
	      pFrame->linesize, 0, is->video_ctx->height,
	      pict.data, pict.linesize);
    
    SDL_UnlockYUVOverlay(vp->bmp);
    /* now we inform our display thread that we have a pic ready */
    if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) {
      is->pictq_windex = 0;
    }
    SDL_LockMutex(is->pictq_mutex);
    is->pictq_size++;
    SDL_UnlockMutex(is->pictq_mutex);
  }
  return 0;
}

double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) {

  double frame_delay;

  if(pts != 0) {
    /* if we have pts, set video clock to it */
    is->video_clock = pts;
  } else {
    /* if we aren't given a pts, set it to the clock */
    pts = is->video_clock;
  }
  /* update the video clock */
  frame_delay = av_q2d(is->video_ctx->time_base);
  /* if we are repeating a frame, adjust clock accordingly */
  frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
  is->video_clock += frame_delay;
  return pts;
}

int video_thread(void *arg) {
  VideoState *is = (VideoState *)arg;
  AVPacket pkt1, *packet = &pkt1;
  int frameFinished;
  AVFrame *pFrame;
  double pts;

  pFrame = av_frame_alloc();

  for(;;) {
    if(packet_queue_get(&is->videoq, packet, 1) < 0) {
      // means we quit getting packets
      break;
    }
    if(packet_queue_get(&is->videoq, packet, 1) < 0) {
      // means we quit getting packets
      break;
    }
    pts = 0;

    // Decode video frame
    avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet);

    if((pts = av_frame_get_best_effort_timestamp(pFrame)) == AV_NOPTS_VALUE) {
    } else {
      pts = 0;
    }
    pts *= av_q2d(is->video_st->time_base);

    // Did we get a video frame?
    if(frameFinished) {
      pts = synchronize_video(is, pFrame, pts);
      if(queue_picture(is, pFrame, pts) < 0) {
	break;
      }
    }
    av_free_packet(packet);
  }
  av_frame_free(&pFrame);
  return 0;
}

int stream_component_open(VideoState *is, int stream_index) {

  AVFormatContext *pFormatCtx = is->pFormatCtx;
  AVCodecContext *codecCtx = NULL;
  AVCodec *codec = NULL;
  SDL_AudioSpec wanted_spec, spec;

  if(stream_index < 0 || stream_index >= pFormatCtx->nb_streams) {
    return -1;
  }

  codec = avcodec_find_decoder(pFormatCtx->streams[stream_index]->codec->codec_id);
  if(!codec) {
    fprintf(stderr, "Unsupported codec!\n");
    return -1;
  }

  codecCtx = avcodec_alloc_context3(codec);
  if(avcodec_copy_context(codecCtx, pFormatCtx->streams[stream_index]->codec) != 0) {
    fprintf(stderr, "Couldn't copy codec context");
    return -1; // Error copying codec context
  }


  if(codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) {
    // Set audio settings from codec info
    wanted_spec.freq = codecCtx->sample_rate;
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.channels = codecCtx->channels;
    wanted_spec.silence = 0;
    wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
    wanted_spec.callback = audio_callback;
    wanted_spec.userdata = is;
    
    if(SDL_OpenAudio(&wanted_spec, &spec) < 0) {
      fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
      return -1;
    }
    is->audio_hw_buf_size = spec.size;
  }
  if(avcodec_open2(codecCtx, codec, NULL) < 0) {
    fprintf(stderr, "Unsupported codec!\n");
    return -1;
  }

  switch(codecCtx->codec_type) {
  case AVMEDIA_TYPE_AUDIO:
    is->audioStream = stream_index;
    is->audio_st = pFormatCtx->streams[stream_index];
    is->audio_ctx = codecCtx;
    is->audio_buf_size = 0;
    is->audio_buf_index = 0;
    memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
    packet_queue_init(&is->audioq);
    SDL_PauseAudio(0);
    break;
  case AVMEDIA_TYPE_VIDEO:
    is->videoStream = stream_index;
    is->video_st = pFormatCtx->streams[stream_index];
    is->video_ctx = codecCtx;

    is->frame_timer = (double)av_gettime() / 1000000.0;
    is->frame_last_delay = 40e-3;
    is->video_current_pts_time = av_gettime();

    packet_queue_init(&is->videoq);
    is->video_tid = SDL_CreateThread(video_thread, is);
    is->sws_ctx = sws_getContext(is->video_ctx->width, is->video_ctx->height,
				 is->video_ctx->pix_fmt, is->video_ctx->width,
				 is->video_ctx->height, PIX_FMT_YUV420P,
				 SWS_BILINEAR, NULL, NULL, NULL
				 );
    break;
  default:
    break;
  }
}

int decode_thread(void *arg) {

  VideoState *is = (VideoState *)arg;
  AVFormatContext *pFormatCtx;
  AVPacket pkt1, *packet = &pkt1;

  int video_index = -1;
  int audio_index = -1;
  int i;

  is->videoStream=-1;
  is->audioStream=-1;

  global_video_state = is;

  // Open video file
  if(avformat_open_input(&pFormatCtx, is->filename, NULL, NULL)!=0)
    return -1; // Couldn't open file

  is->pFormatCtx = pFormatCtx;
  
  // Retrieve stream information
  if(avformat_find_stream_info(pFormatCtx, NULL)<0)
    return -1; // Couldn't find stream information
  
  // Dump information about file onto standard error
  av_dump_format(pFormatCtx, 0, is->filename, 0);
  
  // Find the first video stream

  for(i=0; i<pFormatCtx->nb_streams; i++) {
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
       video_index < 0) {
      video_index=i;
    }
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
       audio_index < 0) {
      audio_index=i;
    }
  }
  if(audio_index >= 0) {
    stream_component_open(is, audio_index);
  }
  if(video_index >= 0) {
    stream_component_open(is, video_index);
  }   

  if(is->videoStream < 0 || is->audioStream < 0) {
    fprintf(stderr, "%s: could not open codecs\n", is->filename);
    goto fail;
  }

  // main decode loop

  for(;;) {
    if(is->quit) {
      break;
    }
    // seek stuff goes here
    if(is->audioq.size > MAX_AUDIOQ_SIZE ||
       is->videoq.size > MAX_VIDEOQ_SIZE) {
      SDL_Delay(10);
      continue;
    }
    if(av_read_frame(is->pFormatCtx, packet) < 0) {
      if(is->pFormatCtx->pb->error == 0) {
	SDL_Delay(100); /* no error; wait for user input */
	continue;
      } else {
	break;
      }
    }
    // Is this a packet from the video stream?
    if(packet->stream_index == is->videoStream) {
      packet_queue_put(&is->videoq, packet);
    } else if(packet->stream_index == is->audioStream) {
      packet_queue_put(&is->audioq, packet);
    } else {
      av_free_packet(packet);
    }
  }
  /* all done - wait for it */
  while(!is->quit) {
    SDL_Delay(100);
  }

 fail:
  if(1){
    SDL_Event event;
    event.type = FF_QUIT_EVENT;
    event.user.data1 = is;
    SDL_PushEvent(&event);
  }
  return 0;
}

int main(int argc, char *argv[]) {

  SDL_Event       event;

  VideoState      *is;

  is = av_mallocz(sizeof(VideoState));

  if(argc < 2) {
    fprintf(stderr, "Usage: test <file>\n");
    exit(1);
  }
  // Register all formats and codecs
  av_register_all();
  
  if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
    fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
    exit(1);
  }

  // Make a screen to put our video
#ifndef __DARWIN__
        screen = SDL_SetVideoMode(640, 480, 0, 0);
#else
        screen = SDL_SetVideoMode(640, 480, 24, 0);
#endif
  if(!screen) {
    fprintf(stderr, "SDL: could not set video mode - exiting\n");
    exit(1);
  }

  screen_mutex = SDL_CreateMutex();

  av_strlcpy(is->filename, argv[1], sizeof(is->filename));

  is->pictq_mutex = SDL_CreateMutex();
  is->pictq_cond = SDL_CreateCond();

  schedule_refresh(is, 40);

  is->av_sync_type = DEFAULT_AV_SYNC_TYPE;
  is->parse_tid = SDL_CreateThread(decode_thread, is);
  if(!is->parse_tid) {
    av_free(is);
    return -1;
  }
  for(;;) {

    SDL_WaitEvent(&event);
    switch(event.type) {
    case FF_QUIT_EVENT:
    case SDL_QUIT:
      is->quit = 1;
      SDL_Quit();
      return 0;
      break;
    case FF_REFRESH_EVENT:
      video_refresh_timer(event.user.data1);
      break;
    default:
      break;
    }
  }
  return 0;
}

Now that we have a more or less decent player on which you can even watch a movie, let's now make ends meet. Last time, we slightly touched on synchronization, namely, synchronization of sound with video, in that order, not vice versa. We are going to make this the same layout as with video: make an internal video clock to track how far the video stream is and synchronize audio with it. Later we will generalize even more - we synchronize audio and video with an external clock.

Video Clock Implementation

Now we want to make a video clock similar to the audio clock we had last time: an internal value that returns the current time offset of the video currently playing. You might think that it will be as simple as updating the timer with the current PTS of the last frame displayed. However, do not forget that the time between video frames may be too long if we drop to the millisecond level. Therefore, the solution is to track another value, the time at which we set the video clock on the PTS of the last frame. Thus, the current value of the video clock will be PTS_of_last_frame + ( current_time - time_elapsed_since_PTS_value_was_set) This solution is very similar to what we did with get_audio_clock .

So, in our full-blown structure we are going to put double video_current_pts and int64_t video_current_pts_time . The clock will be updated in the video_refresh_timer function :

void video_refresh_timer(void *userdata) {

  /* ... */

  if(is->video_st) {
    if(is->pictq_size == 0) {
      schedule_refresh(is, 1);
    } else {
      vp = &is->pictq[is->pictq_rindex];

      is->video_current_pts = vp->pts;
      is->video_current_pts_time = av_gettime();

Do not forget to initialize it in stream_component_open :

is->video_current_pts_time = av_gettime();

And now all we need is some way to get the information:

double get_video_clock(VideoState *is) {
  double delta;

  delta = (av_gettime() - is->video_current_pts_time) / 1000000.0;
  return is->video_current_pts + delta;
}

Abstracting from the watch

But why force yourself to use a video clock? You can go further and change our video synchronization code so that audio and video do not try to synchronize each other. Imagine what a mess it will be if we try to do this with a command line option, as in FFplay. So, let's abstract: we will create a new wrapper function, get_master_clock , which checks the variable av_sync_type , and then calls get_audio_clock , get_video_clock, or any other clock that it could use. We can even use a computer clock, which we call get_external_clock :

enum {
  AV_SYNC_AUDIO_MASTER,
  AV_SYNC_VIDEO_MASTER,
  AV_SYNC_EXTERNAL_MASTER,
};

#define DEFAULT_AV_SYNC_TYPE AV_SYNC_VIDEO_MASTER

double get_master_clock(VideoState *is) {
  if(is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
    return get_video_clock(is);
  } else if(is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
    return get_audio_clock(is);
  } else {
    return get_external_clock(is);
  }
}
main() {
...
  is->av_sync_type = DEFAULT_AV_SYNC_TYPE;
...
}

Audio sync

Now the hardest part: synchronizing audio with the video clock. Our strategy is to measure where the audio is, compare it with the video clock, and then find out how many samples we need to adjust, that is, do we need to speed up by dropping the samples or slow down by adding?

We run the synchronize_audio function every time we process each set of audio samples that we get in order to properly reduce or increase this set. However, we do not want to synchronize all the time, because audio processing occurs much more often than processing video packets. So, we are going to set the minimum number of consecutive calls to the synchronize_audio functionthat are considered unsynchronized before we bother to do anything. Of course, as last time, “unsynchronization” means that the audio clock and the video clock differ by an amount greater than the synchronization threshold.

So we are going to use a fractional coefficient, say, s , and now, let's say we got Nsets of audio samples that were out of sync. The number of samples that we do not synchronize can also vary greatly, so we take the average value of how much each of them is not synchronized. For example, the first call could show that we are not synchronized for 40 ms, the next for 50 ms and so on. But we are not going to take a simple mean, because the most recent values are more important than those that come before them. So, we are going to use a fractional coefficient, say, c , and summarize the differences as follows: diff_sum = new_diff + diff_sum * c . When we are ready to find the average difference, we simply compute avg_diff =diff_sum * (1 - c ).

What the hell is going on here? The equation looks like some kind of magic. Well, this is basically a weighted average using a geometric series as weights. I do not know if there is a name for this (I even checked on Wikipedia!), But for more information, here is an explanation (or here: weightedmean.txt ).

Here's what our function looks like:

/* Add or subtract samples to get a better sync, return new
   audio buffer size */
int synchronize_audio(VideoState *is, short *samples,
		      int samples_size, double pts) {
  int n;
  double ref_clock;
  
  n = 2 * is->audio_st->codec->channels;
  
  if(is->av_sync_type != AV_SYNC_AUDIO_MASTER) {
    double diff, avg_diff;
    int wanted_size, min_size, max_size, nb_samples;
    
    ref_clock = get_master_clock(is);
    diff = get_audio_clock(is) - ref_clock;

    if(diff < AV_NOSYNC_THRESHOLD) {
      // accumulate the diffs
      is->audio_diff_cum = diff + is->audio_diff_avg_coef
	* is->audio_diff_cum;
      if(is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
	is->audio_diff_avg_count++;
      } else {
	avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);

       /* Shrinking/expanding buffer code.... */

      }
    } else {
      /* difference is TOO big; reset diff stuff */
      is->audio_diff_avg_count = 0;
      is->audio_diff_cum = 0;
    }
  }
  return samples_size;
}

So everything is fine with us; we know approximately how much the sound is not consistent with the video or with what we use as a watch. So, let's now calculate how many samples we need to add or discard by placing this code in the section “Reducing / expanding buffer code”:

if(fabs(avg_diff) >= is->audio_diff_threshold) {
  wanted_size = samples_size + 
  ((int)(diff * is->audio_st->codec->sample_rate) * n);
  min_size = samples_size * ((100 - SAMPLE_CORRECTION_PERCENT_MAX)
                             / 100);
  max_size = samples_size * ((100 + SAMPLE_CORRECTION_PERCENT_MAX) 
                             / 100);
  if(wanted_size < min_size) {
    wanted_size = min_size;
  } else if (wanted_size > max_size) {
    wanted_size = max_size;
  }

Remember that audio_length * ( sample_rate * # of channel * 2) is the number of samples in audio_length seconds of audio. Therefore, the number of samples we want will be equal to the number of samples that we already have, plus or minus the number of samples that correspond to the amount of time during which the sound was played. We will also set a limit on how big or small our correction can be, because if we change our buffer too much, it will be too annoying for the user.

Correction of the number of samples

Now we need to fix the sound. You may have noticed that our synchronize_audio function returns a sample size, which then tells us how many bytes to send to the stream. So we just need to adjust the sample size to the desired value. This works to reduce the sample size. But if you need to increase it, we can’t just increase the sample size, because there is no more data in the buffer! Therefore, we must add a little. But what exactly to add? It would be foolish to try to extrapolate the audio, so let's just use the audio that we already have, adding the value of the last sample to the buffer.

if(wanted_size < samples_size) {
  /* remove samples */
  samples_size = wanted_size;
} else if(wanted_size > samples_size) {
  uint8_t *samples_end, *q;
  int nb;

  /* add samples by copying final samples */
  nb = (samples_size - wanted_size);
  samples_end = (uint8_t *)samples + samples_size - n;
  q = samples_end + n;
  while(nb > 0) {
    memcpy(q, samples_end, n);
    q += n;
    nb -= n;
  }
  samples_size = wanted_size;
}

Now we are returning the sample size, and we are done with this function. All we need to do now is use this:

void audio_callback(void *userdata, Uint8 *stream, int len) {

  VideoState *is = (VideoState *)userdata;
  int len1, audio_size;
  double pts;

  while(len > 0) {
    if(is->audio_buf_index >= is->audio_buf_size) {
      /* We have already sent all our data; get more */
      audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
      if(audio_size < 0) {
	/* If error, output silence */
	is->audio_buf_size = 1024;
	memset(is->audio_buf, 0, is->audio_buf_size);
      } else {
	audio_size = synchronize_audio(is, (int16_t *)is->audio_buf,
				       audio_size, pts);
	is->audio_buf_size = audio_size;

All we did was insert a synchronize_audio call . (Also, be sure to check the source code, in which we initialize the variables that I did not bother to define.)

And the last, before we finish: we need to add the “if” condition to make sure that we do not synchronize the video if it’s the main clock:

if(is->av_sync_type != AV_SYNC_VIDEO_MASTER) {
  ref_clock = get_master_clock(is);
  diff = vp->pts - ref_clock;

  /* Skip or repeat the frame. Take delay into account
     FFPlay still doesn't "know if this is the best guess." */
  sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay :
                    AV_SYNC_THRESHOLD;
  if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
    if(diff <= -sync_threshold) {
      delay = 0;
    } else if(diff >= sync_threshold) {
      delay = 2 * delay;
    }
  }
}

And it works! Make sure you check the source file to initialize any variables that I did not bother to define or initialize. Then compile:

gcc -o tutorial06 tutorial06.c -lavutil -lavformat -lavcodec -lswscale -lz -lm \
`sdl-config --cflags --libs`

and the flight will be normal.

In the last lesson we will rewind.

Lesson 7: Search ← ⇑ →

Full listing tutorial07.c

// tutorial05.c
// A pedagogical video player that really works!
//
// Code based on FFplay, Copyright (c) 2003 Fabrice Bellard, 
// and a tutorial by Martin Bohme (boehme@inb.uni-luebeckREMOVETHIS.de)
// Tested on Gentoo, CVS version 5/01/07 compiled with GCC 4.1.1
// With updates from https://github.com/chelyaev/ffmpeg-tutorial
// Updates tested on:
// LAVC 54.59.100, LAVF 54.29.104, LSWS 2.1.101, SDL 1.2.15
// on GCC 4.7.2 in Debian February 2015
// Use
//
// gcc -o tutorial05 tutorial05.c -lavformat -lavcodec -lswscale -lz -lm `sdl-config --cflags --libs`
// to build (assuming libavformat and libavcodec are correctly installed, 
// and assuming you have sdl-config. Please refer to SDL docs for your installation.)
//
// Run using
// tutorial04 myvideofile.mpg
//
// to play the video stream on your screen.

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>

#include <SDL.h>
#include <SDL_thread.h>

#ifdef __MINGW32__
#undef main /* Prevents SDL from overriding main() */
#endif

#include <stdio.h>
#include <assert.h>
#include <math.h>

// compatibility with newer API
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#define av_frame_free avcodec_free_frame
#endif

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000

#define MAX_AUDIOQ_SIZE (5 * 16 * 1024)
#define MAX_VIDEOQ_SIZE (5 * 256 * 1024)

#define AV_SYNC_THRESHOLD 0.01
#define AV_NOSYNC_THRESHOLD 10.0

#define SAMPLE_CORRECTION_PERCENT_MAX 10
#define AUDIO_DIFF_AVG_NB 20

#define FF_REFRESH_EVENT (SDL_USEREVENT)
#define FF_QUIT_EVENT (SDL_USEREVENT + 1)

#define VIDEO_PICTURE_QUEUE_SIZE 1

#define DEFAULT_AV_SYNC_TYPE AV_SYNC_VIDEO_MASTER

typedef struct PacketQueue {
  AVPacketList *first_pkt, *last_pkt;
  int nb_packets;
  int size;
  SDL_mutex *mutex;
  SDL_cond *cond;
} PacketQueue;


typedef struct VideoPicture {
  SDL_Overlay *bmp;
  int width, height; /* source height & width */
  int allocated;
  double pts;
} VideoPicture;

typedef struct VideoState {

  AVFormatContext *pFormatCtx;
  int             videoStream, audioStream;

  int             av_sync_type;
  double          external_clock; /* external clock base */
  int64_t         external_clock_time;
  int             seek_req;
  int             seek_flags;
  int64_t         seek_pos;

  double          audio_clock;
  AVStream        *audio_st;
  AVCodecContext  *audio_ctx;
  PacketQueue     audioq;
  uint8_t         audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
  unsigned int    audio_buf_size;
  unsigned int    audio_buf_index;
  AVFrame         audio_frame;
  AVPacket        audio_pkt;
  uint8_t         *audio_pkt_data;
  int             audio_pkt_size;
  int             audio_hw_buf_size;
  double          audio_diff_cum; /* used for AV difference average computation */
  double          audio_diff_avg_coef;
  double          audio_diff_threshold;
  int             audio_diff_avg_count;
  double          frame_timer;
  double          frame_last_pts;
  double          frame_last_delay;
  double          video_clock; ///<pts of last decoded frame / predicted pts of next decoded frame
  double          video_current_pts; ///<current displayed pts (different from video_clock if frame fifos are used)
  int64_t         video_current_pts_time;  ///<time (av_gettime) at which we updated video_current_pts - used to have running video pts
  AVStream        *video_st;
  AVCodecContext  *video_ctx;
  PacketQueue     videoq;
  struct SwsContext *sws_ctx;

  VideoPicture    pictq[VIDEO_PICTURE_QUEUE_SIZE];
  int             pictq_size, pictq_rindex, pictq_windex;
  SDL_mutex       *pictq_mutex;
  SDL_cond        *pictq_cond;
  
  SDL_Thread      *parse_tid;
  SDL_Thread      *video_tid;

  char            filename[1024];
  int             quit;
} VideoState;

enum {
  AV_SYNC_AUDIO_MASTER,
  AV_SYNC_VIDEO_MASTER,
  AV_SYNC_EXTERNAL_MASTER,
};

SDL_Surface     *screen;
SDL_mutex       *screen_mutex;

/* Since we only have one decoding thread, the Big Struct
   can be global in case we need it. */
VideoState *global_video_state;
AVPacket flush_pkt;

void packet_queue_init(PacketQueue *q) {
  memset(q, 0, sizeof(PacketQueue));
  q->mutex = SDL_CreateMutex();
  q->cond = SDL_CreateCond();
}

int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

  AVPacketList *pkt1;
  if(pkt != &flush_pkt && av_dup_packet(pkt) < 0) {
    return -1;
  }
  pkt1 = av_malloc(sizeof(AVPacketList));
  if (!pkt1)
    return -1;
  pkt1->pkt = *pkt;
  pkt1->next = NULL;
  
  SDL_LockMutex(q->mutex);

  if (!q->last_pkt)
    q->first_pkt = pkt1;
  else
    q->last_pkt->next = pkt1;
  q->last_pkt = pkt1;
  q->nb_packets++;
  q->size += pkt1->pkt.size;
  SDL_CondSignal(q->cond);
  
  SDL_UnlockMutex(q->mutex);
  return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
  AVPacketList *pkt1;
  int ret;

  SDL_LockMutex(q->mutex);
  
  for(;;) {
    
    if(global_video_state->quit) {
      ret = -1;
      break;
    }

    pkt1 = q->first_pkt;
    if (pkt1) {
      q->first_pkt = pkt1->next;
      if (!q->first_pkt)
	q->last_pkt = NULL;
      q->nb_packets--;
      q->size -= pkt1->pkt.size;
      *pkt = pkt1->pkt;
      av_free(pkt1);
      ret = 1;
      break;
    } else if (!block) {
      ret = 0;
      break;
    } else {
      SDL_CondWait(q->cond, q->mutex);
    }
  }
  SDL_UnlockMutex(q->mutex);
  return ret;
}

static void packet_queue_flush(PacketQueue *q) {
  AVPacketList *pkt, *pkt1;

  SDL_LockMutex(q->mutex);
  for(pkt = q->first_pkt; pkt != NULL; pkt = pkt1) {
    pkt1 = pkt->next;
    av_free_packet(&pkt->pkt);
    av_freep(&pkt);
  }
  q->last_pkt = NULL;
  q->first_pkt = NULL;
  q->nb_packets = 0;
  q->size = 0;
  SDL_UnlockMutex(q->mutex);
}

double get_audio_clock(VideoState *is) {
  double pts;
  int hw_buf_size, bytes_per_sec, n;
  
  pts = is->audio_clock; /* maintained in the audio thread */
  hw_buf_size = is->audio_buf_size - is->audio_buf_index;
  bytes_per_sec = 0;
  n = is->audio_ctx->channels * 2;
  if(is->audio_st) {
    bytes_per_sec = is->audio_ctx->sample_rate * n;
  }
  if(bytes_per_sec) {
    pts -= (double)hw_buf_size / bytes_per_sec;
  }
  return pts;
}
double get_video_clock(VideoState *is) {
  double delta;

  delta = (av_gettime() - is->video_current_pts_time) / 1000000.0;
  return is->video_current_pts + delta;
}
double get_external_clock(VideoState *is) {
  return av_gettime() / 1000000.0;
}

double get_master_clock(VideoState *is) {
  if(is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
    return get_video_clock(is);
  } else if(is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
    return get_audio_clock(is);
  } else {
    return get_external_clock(is);
  }
}


/* Add or subtract samples to get a better sync, return new
   audio buffer size */
int synchronize_audio(VideoState *is, short *samples,
		      int samples_size, double pts) {
  int n;
  double ref_clock;

  n = 2 * is->audio_ctx->channels;
  
  if(is->av_sync_type != AV_SYNC_AUDIO_MASTER) {
    double diff, avg_diff;
    int wanted_size, min_size, max_size /*, nb_samples */;
    
    ref_clock = get_master_clock(is);
    diff = get_audio_clock(is) - ref_clock;

    if(diff < AV_NOSYNC_THRESHOLD) {
      // accumulate the diffs
      is->audio_diff_cum = diff + is->audio_diff_avg_coef
	* is->audio_diff_cum;
      if(is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
	is->audio_diff_avg_count++;
      } else {
	avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
	if(fabs(avg_diff) >= is->audio_diff_threshold) {
	  wanted_size = samples_size + ((int)(diff * is->audio_ctx->sample_rate) * n);
	  min_size = samples_size * ((100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100);
	  max_size = samples_size * ((100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100);
	  if(wanted_size < min_size) {
	    wanted_size = min_size;
	  } else if (wanted_size > max_size) {
	    wanted_size = max_size;
	  }
	  if(wanted_size < samples_size) {
	    /* remove samples */
	    samples_size = wanted_size;
	  } else if(wanted_size > samples_size) {
	    uint8_t *samples_end, *q;
	    int nb;

	    /* add samples by copying final sample*/
	    nb = (samples_size - wanted_size);
	    samples_end = (uint8_t *)samples + samples_size - n;
	    q = samples_end + n;
	    while(nb > 0) {
	      memcpy(q, samples_end, n);
	      q += n;
	      nb -= n;
	    }
	    samples_size = wanted_size;
	  }
	}
      }
    } else {
      /* difference is TOO big; reset diff stuff */
      is->audio_diff_avg_count = 0;
      is->audio_diff_cum = 0;
    }
  }
  return samples_size;
}

int audio_decode_frame(VideoState *is, uint8_t *audio_buf, int buf_size, double *pts_ptr) {

  int len1, data_size = 0;
  AVPacket *pkt = &is->audio_pkt;
  double pts;
  int n;

  for(;;) {
    while(is->audio_pkt_size > 0) {
      int got_frame = 0;
      len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt);
      if(len1 < 0) {
	/* if error, skip frame */
	is->audio_pkt_size = 0;
	break;
      }
      data_size = 0;
      if(got_frame) {
	data_size = av_samples_get_buffer_size(NULL, 
					       is->audio_ctx->channels,
					       is->audio_frame.nb_samples,
					       is->audio_ctx->sample_fmt,
					       1);
	assert(data_size <= buf_size);
	memcpy(audio_buf, is->audio_frame.data[0], data_size);
      }
      is->audio_pkt_data += len1;
      is->audio_pkt_size -= len1;
      if(data_size <= 0) {
	/* No data yet, get more frames */
	continue;
      }
      pts = is->audio_clock;
      *pts_ptr = pts;
      n = 2 * is->audio_ctx->channels;
      is->audio_clock += (double)data_size /
	(double)(n * is->audio_ctx->sample_rate);
      /* We have data, return it and come back for more later */
      return data_size;
    }
    if(pkt->data)
      av_free_packet(pkt);

    if(is->quit) {
      return -1;
    }
    /* next packet */
    if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
      return -1;
    }
    if(pkt->data == flush_pkt.data) {
      avcodec_flush_buffers(is->audio_ctx);
      continue;
    }
    is->audio_pkt_data = pkt->data;
    is->audio_pkt_size = pkt->size;
    /* if update, update the audio clock w/pts */
    if(pkt->pts != AV_NOPTS_VALUE) {
      is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts;
    }
  }
}

void audio_callback(void *userdata, Uint8 *stream, int len) {

  VideoState *is = (VideoState *)userdata;
  int len1, audio_size;
  double pts;

  while(len > 0) {
    if(is->audio_buf_index >= is->audio_buf_size) {
      /* We have already sent all our data; get more */
      audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
      if(audio_size < 0) {
	/* If error, output silence */
	is->audio_buf_size = 1024;
	memset(is->audio_buf, 0, is->audio_buf_size);
      } else {
	audio_size = synchronize_audio(is, (int16_t *)is->audio_buf,
				       audio_size, pts);
	is->audio_buf_size = audio_size;
      }
      is->audio_buf_index = 0;
    }
    len1 = is->audio_buf_size - is->audio_buf_index;
    if(len1 > len)
      len1 = len;
    memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
    len -= len1;
    stream += len1;
    is->audio_buf_index += len1;
  }
}

static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) {
  SDL_Event event;
  event.type = FF_REFRESH_EVENT;
  event.user.data1 = opaque;
  SDL_PushEvent(&event);
  return 0; /* 0 means stop timer */
}

/* schedule a video refresh in 'delay' ms */
static void schedule_refresh(VideoState *is, int delay) {
  SDL_AddTimer(delay, sdl_refresh_timer_cb, is);
}

void video_display(VideoState *is) {

  SDL_Rect rect;
  VideoPicture *vp;
  float aspect_ratio;
  int w, h, x, y;
  int i;

  vp = &is->pictq[is->pictq_rindex];
  if(vp->bmp) {
    if(is->video_ctx->sample_aspect_ratio.num == 0) {
      aspect_ratio = 0;
    } else {
      aspect_ratio = av_q2d(is->video_ctx->sample_aspect_ratio) *
	is->video_ctx->width / is->video_ctx->height;
    }
    if(aspect_ratio <= 0.0) {
      aspect_ratio = (float)is->video_ctx->width /
	(float)is->video_ctx->height;
    }
    h = screen->h;
    w = ((int)rint(h * aspect_ratio)) & -3;
    if(w > screen->w) {
      w = screen->w;
      h = ((int)rint(w / aspect_ratio)) & -3;
    }
    x = (screen->w - w) / 2;
    y = (screen->h - h) / 2;
    
    rect.x = x;
    rect.y = y;
    rect.w = w;
    rect.h = h;
    SDL_LockMutex(screen_mutex);
    SDL_DisplayYUVOverlay(vp->bmp, &rect);
    SDL_UnlockMutex(screen_mutex);
  }
}

void video_refresh_timer(void *userdata) {

  VideoState *is = (VideoState *)userdata;
  VideoPicture *vp;
  double actual_delay, delay, sync_threshold, ref_clock, diff;
  
  if(is->video_st) {
    if(is->pictq_size == 0) {
      schedule_refresh(is, 1);
    } else {
      vp = &is->pictq[is->pictq_rindex];
      
      is->video_current_pts = vp->pts;
      is->video_current_pts_time = av_gettime();
      delay = vp->pts - is->frame_last_pts; /* the pts from last time */
      if(delay <= 0 || delay >= 1.0) {
	/* if incorrect delay, use previous one */
	delay = is->frame_last_delay;
      }
      /* save for next time */
      is->frame_last_delay = delay;
      is->frame_last_pts = vp->pts;



      /* update delay to sync to audio if not master source */
      if(is->av_sync_type != AV_SYNC_VIDEO_MASTER) {
	ref_clock = get_master_clock(is);
	diff = vp->pts - ref_clock;
	
	/* Skip or repeat the frame. Take delay into account
	   FFPlay still doesn't "know if this is the best guess." */
	sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
	if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
	  if(diff <= -sync_threshold) {
	    delay = 0;
	  } else if(diff >= sync_threshold) {
	    delay = 2 * delay;
	  }
	}
      }
      is->frame_timer += delay;
      /* computer the REAL delay */
      actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
      if(actual_delay < 0.010) {
	/* Really it should skip the picture instead */
	actual_delay = 0.010;
      }
      schedule_refresh(is, (int)(actual_delay * 1000 + 0.5));
      
      /* show the picture! */
      video_display(is);
      
      /* update queue for next picture! */
      if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
	is->pictq_rindex = 0;
      }
      SDL_LockMutex(is->pictq_mutex);
      is->pictq_size--;
      SDL_CondSignal(is->pictq_cond);
      SDL_UnlockMutex(is->pictq_mutex);
    }
  } else {
    schedule_refresh(is, 100);
  }
}
      
void alloc_picture(void *userdata) {

  VideoState *is = (VideoState *)userdata;
  VideoPicture *vp;

  vp = &is->pictq[is->pictq_windex];
  if(vp->bmp) {
    // we already have one make another, bigger/smaller
    SDL_FreeYUVOverlay(vp->bmp);
  }
  // Allocate a place to put our YUV image on that screen
  SDL_LockMutex(screen_mutex);
  vp->bmp = SDL_CreateYUVOverlay(is->video_ctx->width,
				 is->video_ctx->height,
				 SDL_YV12_OVERLAY,
				 screen);
  SDL_UnlockMutex(screen_mutex);

  vp->width = is->video_ctx->width;
  vp->height = is->video_ctx->height;
  vp->allocated = 1;

}

int queue_picture(VideoState *is, AVFrame *pFrame, double pts) {

  VideoPicture *vp;
  int dst_pix_fmt;
  AVPicture pict;

  /* wait until we have space for a new pic */
  SDL_LockMutex(is->pictq_mutex);
  while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE &&
	!is->quit) {
    SDL_CondWait(is->pictq_cond, is->pictq_mutex);
  }
  SDL_UnlockMutex(is->pictq_mutex);

  if(is->quit)
    return -1;

  // windex is set to 0 initially
  vp = &is->pictq[is->pictq_windex];

  /* allocate or resize the buffer! */
  if(!vp->bmp ||
     vp->width != is->video_ctx->width ||
     vp->height != is->video_ctx->height) {
    SDL_Event event;

    vp->allocated = 0;
    alloc_picture(is);
    if(is->quit) {
      return -1;
    }
  }
  /* We have a place to put our picture on the queue */

  if(vp->bmp) {

    SDL_LockYUVOverlay(vp->bmp);
    vp->pts = pts;
    
    dst_pix_fmt = PIX_FMT_YUV420P;
    /* point pict at the queue */

    pict.data[0] = vp->bmp->pixels[0];
    pict.data[1] = vp->bmp->pixels[2];
    pict.data[2] = vp->bmp->pixels[1];
    
    pict.linesize[0] = vp->bmp->pitches[0];
    pict.linesize[1] = vp->bmp->pitches[2];
    pict.linesize[2] = vp->bmp->pitches[1];
    
    // Convert the image into YUV format that SDL uses
    sws_scale(is->sws_ctx, (uint8_t const * const *)pFrame->data,
	      pFrame->linesize, 0, is->video_ctx->height,
	      pict.data, pict.linesize);
    
    SDL_UnlockYUVOverlay(vp->bmp);
    /* now we inform our display thread that we have a pic ready */
    if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) {
      is->pictq_windex = 0;
    }
    SDL_LockMutex(is->pictq_mutex);
    is->pictq_size++;
    SDL_UnlockMutex(is->pictq_mutex);
  }
  return 0;
}

double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) {

  double frame_delay;

  if(pts != 0) {
    /* if we have pts, set video clock to it */
    is->video_clock = pts;
  } else {
    /* if we aren't given a pts, set it to the clock */
    pts = is->video_clock;
  }
  /* update the video clock */
  frame_delay = av_q2d(is->video_ctx->time_base);
  /* if we are repeating a frame, adjust clock accordingly */
  frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
  is->video_clock += frame_delay;
  return pts;
}

int video_thread(void *arg) {
  VideoState *is = (VideoState *)arg;
  AVPacket pkt1, *packet = &pkt1;
  int frameFinished;
  AVFrame *pFrame;
  double pts;

  pFrame = av_frame_alloc();

  for(;;) {
    if(packet_queue_get(&is->videoq, packet, 1) < 0) {
      // means we quit getting packets
      break;
    }
    if(packet_queue_get(&is->videoq, packet, 1) < 0) {
      // means we quit getting packets
      break;
    }
    pts = 0;

    // Decode video frame
    avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet);

    if((pts = av_frame_get_best_effort_timestamp(pFrame)) == AV_NOPTS_VALUE) {
      pts = av_frame_get_best_effort_timestamp(pFrame);
    } else {
      pts = 0;
    }
    pts *= av_q2d(is->video_st->time_base);

    // Did we get a video frame?
    if(frameFinished) {
      pts = synchronize_video(is, pFrame, pts);
      if(queue_picture(is, pFrame, pts) < 0) {
	break;
      }
    }
    av_free_packet(packet);
  }
  av_frame_free(&pFrame);
  return 0;
}

int stream_component_open(VideoState *is, int stream_index) {

  AVFormatContext *pFormatCtx = is->pFormatCtx;
  AVCodecContext *codecCtx = NULL;
  AVCodec *codec = NULL;
  SDL_AudioSpec wanted_spec, spec;

  if(stream_index < 0 || stream_index >= pFormatCtx->nb_streams) {
    return -1;
  }

  codec = avcodec_find_decoder(pFormatCtx->streams[stream_index]->codec->codec_id);
  if(!codec) {
    fprintf(stderr, "Unsupported codec!\n");
    return -1;
  }

  codecCtx = avcodec_alloc_context3(codec);
  if(avcodec_copy_context(codecCtx, pFormatCtx->streams[stream_index]->codec) != 0) {
    fprintf(stderr, "Couldn't copy codec context");
    return -1; // Error copying codec context
  }


  if(codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) {
    // Set audio settings from codec info
    wanted_spec.freq = codecCtx->sample_rate;
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.channels = codecCtx->channels;
    wanted_spec.silence = 0;
    wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
    wanted_spec.callback = audio_callback;
    wanted_spec.userdata = is;
    
    if(SDL_OpenAudio(&wanted_spec, &spec) < 0) {
      fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
      return -1;
    }
    is->audio_hw_buf_size = spec.size;
  }
  if(avcodec_open2(codecCtx, codec, NULL) < 0) {
    fprintf(stderr, "Unsupported codec!\n");
    return -1;
  }

  switch(codecCtx->codec_type) {
  case AVMEDIA_TYPE_AUDIO:
    is->audioStream = stream_index;
    is->audio_st = pFormatCtx->streams[stream_index];
    is->audio_ctx = codecCtx;
    is->audio_buf_size = 0;
    is->audio_buf_index = 0;
    memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
    packet_queue_init(&is->audioq);
    SDL_PauseAudio(0);
    break;
  case AVMEDIA_TYPE_VIDEO:
    is->videoStream = stream_index;
    is->video_st = pFormatCtx->streams[stream_index];
    is->video_ctx = codecCtx;

    is->frame_timer = (double)av_gettime() / 1000000.0;
    is->frame_last_delay = 40e-3;
    is->video_current_pts_time = av_gettime();

    packet_queue_init(&is->videoq);
    is->video_tid = SDL_CreateThread(video_thread, is);
    is->sws_ctx = sws_getContext(is->video_ctx->width, is->video_ctx->height,
				 is->video_ctx->pix_fmt, is->video_ctx->width,
				 is->video_ctx->height, PIX_FMT_YUV420P,
				 SWS_BILINEAR, NULL, NULL, NULL
				 );
    break;
  default:
    break;
  }
}

int decode_thread(void *arg) {

  VideoState *is = (VideoState *)arg;
  AVFormatContext *pFormatCtx;
  AVPacket pkt1, *packet = &pkt1;

  int video_index = -1;
  int audio_index = -1;
  int i;

  is->videoStream=-1;
  is->audioStream=-1;

  global_video_state = is;

  // Open video file
  if(avformat_open_input(&pFormatCtx, is->filename, NULL, NULL)!=0)
    return -1; // Couldn't open file

  is->pFormatCtx = pFormatCtx;
  
  // Retrieve stream information
  if(avformat_find_stream_info(pFormatCtx, NULL)<0)
    return -1; // Couldn't find stream information
  
  // Dump information about file onto standard error
  av_dump_format(pFormatCtx, 0, is->filename, 0);
  
  // Find the first video stream

  for(i=0; i<pFormatCtx->nb_streams; i++) {
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
       video_index < 0) {
      video_index=i;
    }
    if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
       audio_index < 0) {
      audio_index=i;
    }
  }
  if(audio_index >= 0) {
    stream_component_open(is, audio_index);
  }
  if(video_index >= 0) {
    stream_component_open(is, video_index);
  }   

  if(is->videoStream < 0 || is->audioStream < 0) {
    fprintf(stderr, "%s: could not open codecs\n", is->filename);
    goto fail;
  }

  // main decode loop

  for(;;) {
    if(is->quit) {
      break;
    }
    // seek stuff goes here
    if(is->seek_req) {
      int stream_index= -1;
      int64_t seek_target = is->seek_pos;

      if     (is->videoStream >= 0) stream_index = is->videoStream;
      else if(is->audioStream >= 0) stream_index = is->audioStream;

      if(stream_index>=0){
	seek_target= av_rescale_q(seek_target, AV_TIME_BASE_Q,
				  pFormatCtx->streams[stream_index]->time_base);
      }
      if(av_seek_frame(is->pFormatCtx, stream_index, 
		       seek_target, is->seek_flags) < 0) {
	fprintf(stderr, "%s: error while seeking\n",
		is->pFormatCtx->filename);
      } else {

	if(is->audioStream >= 0) {
	  packet_queue_flush(&is->audioq);
	  packet_queue_put(&is->audioq, &flush_pkt);
	}
	if(is->videoStream >= 0) {
	  packet_queue_flush(&is->videoq);
	  packet_queue_put(&is->videoq, &flush_pkt);
	}
      }
      is->seek_req = 0;
    }

    if(is->audioq.size > MAX_AUDIOQ_SIZE ||
       is->videoq.size > MAX_VIDEOQ_SIZE) {
      SDL_Delay(10);
      continue;
    }
    if(av_read_frame(is->pFormatCtx, packet) < 0) {
      if(is->pFormatCtx->pb->error == 0) {
	SDL_Delay(100); /* no error; wait for user input */
	continue;
      } else {
	break;
      }
    }
    // Is this a packet from the video stream?
    if(packet->stream_index == is->videoStream) {
      packet_queue_put(&is->videoq, packet);
    } else if(packet->stream_index == is->audioStream) {
      packet_queue_put(&is->audioq, packet);
    } else {
      av_free_packet(packet);
    }
  }
  /* all done - wait for it */
  while(!is->quit) {
    SDL_Delay(100);
  }

 fail:
  if(1){
    SDL_Event event;
    event.type = FF_QUIT_EVENT;
    event.user.data1 = is;
    SDL_PushEvent(&event);
  }
  return 0;
}

void stream_seek(VideoState *is, int64_t pos, int rel) {

  if(!is->seek_req) {
    is->seek_pos = pos;
    is->seek_flags = rel < 0 ? AVSEEK_FLAG_BACKWARD : 0;
    is->seek_req = 1;
  }
}

int main(int argc, char *argv[]) {

  SDL_Event       event;

  VideoState      *is;

  is = av_mallocz(sizeof(VideoState));

  if(argc < 2) {
    fprintf(stderr, "Usage: test <file>\n");
    exit(1);
  }
  // Register all formats and codecs
  av_register_all();
  
  if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
    fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
    exit(1);
  }

  // Make a screen to put our video
#ifndef __DARWIN__
        screen = SDL_SetVideoMode(640, 480, 0, 0);
#else
        screen = SDL_SetVideoMode(640, 480, 24, 0);
#endif
  if(!screen) {
    fprintf(stderr, "SDL: could not set video mode - exiting\n");
    exit(1);
  }

  screen_mutex = SDL_CreateMutex();

  av_strlcpy(is->filename, argv[1], sizeof(is->filename));

  is->pictq_mutex = SDL_CreateMutex();
  is->pictq_cond = SDL_CreateCond();

  schedule_refresh(is, 40);

  is->av_sync_type = DEFAULT_AV_SYNC_TYPE;
  is->parse_tid = SDL_CreateThread(decode_thread, is);
  if(!is->parse_tid) {
    av_free(is);
    return -1;
  }

  av_init_packet(&flush_pkt);
  flush_pkt.data = "FLUSH";

  for(;;) {
    double incr, pos;
    SDL_WaitEvent(&event);
    switch(event.type) {
    case SDL_KEYDOWN:
      switch(event.key.keysym.sym) {
      case SDLK_LEFT:
	incr = -10.0;
	goto do_seek;
      case SDLK_RIGHT:
	incr = 10.0;
	goto do_seek;
      case SDLK_UP:
	incr = 60.0;
	goto do_seek;
      case SDLK_DOWN:
	incr = -60.0;
	goto do_seek;
      do_seek:
	if(global_video_state) {
	  pos = get_master_clock(global_video_state);
	  pos += incr;
	  stream_seek(global_video_state, (int64_t)(pos * AV_TIME_BASE), incr);
	}
	break;
      default:
	break;
      }
      break;
    case FF_QUIT_EVENT:
    case SDL_QUIT:
      is->quit = 1;
      /*
       * If the video has finished playing, then both the picture and
       * audio queues are waiting for more data.  Make them stop
       * waiting and terminate normally.
       */
      SDL_CondSignal(is->audioq.cond);
      SDL_CondSignal(is->videoq.cond);
      SDL_Quit();
      return 0;
      break;
    case FF_REFRESH_EVENT:
      video_refresh_timer(event.user.data1);
      break;
    default:
      break;
    }
  }
  return 0;

}

Search Command Processing

Now we are going to add some search capabilities in our player, because it is really annoying when you can not rewind the movie. In addition, we will see how easy it is to use the av_seek_frame function .

We are going to make the arrows on the keyboard “left” and “right” scroll the film forward and backward a little, and the arrows “up” and “down” are already more significant. “A little” - it will be 10 seconds, and “a lot” - all 60. Therefore, we need to configure our main loop so that it intercepts keystroke events. But the fact is that when we get a keystroke, we cannot call av_seek_frame directly. This must be done in our main decoding loop, decode_thread loop. Therefore, instead, we will add some values to the main structure, which will contain a new position for the search and some search flags:

  int             seek_req;
  int             seek_flags;
  int64_t         seek_pos;

Now we need to configure our main loop that catches keystrokes:

  for(;;) {
    double incr, pos;

    SDL_WaitEvent(&event);
    switch(event.type) {
    case SDL_KEYDOWN:
      switch(event.key.keysym.sym) {
      case SDLK_LEFT:
	incr = -10.0;
	goto do_seek;
      case SDLK_RIGHT:
	incr = 10.0;
	goto do_seek;
      case SDLK_UP:
	incr = 60.0;
	goto do_seek;
      case SDLK_DOWN:
	incr = -60.0;
	goto do_seek;
      do_seek:
	if(global_video_state) {
	  pos = get_master_clock(global_video_state);
	  pos += incr;
	  stream_seek(global_video_state, 
                      (int64_t)(pos * AV_TIME_BASE), incr);
	}
	break;
      default:
	break;
      }
      break;

To catch a keystroke, first we look at whether the SDL_KEYDOWN event occurred . Then we check which key is received using event.key.keysym.sym . As soon as we find out in which direction we are looking, we calculate a new time, adding an increment to the value from our new get_master_clock function . Then we call the stream_seek function to set the seek_pos values , etc. Convert our new time to avcodec internal timestamp units . Recall that timestamps in streams are measured in frames, not seconds, using the following formula: seconds = frames * time_base ( fps ).By default, avcodec is set to 1,000,000 frames per second (so a position of 2 seconds will have a timestamp of 2,000,000). Why do we need to convert this value - see later.

Here is our stream_seek function . Note that we set the flag if we go back:

void stream_seek(VideoState *is, int64_t pos, int rel) {

  if(!is->seek_req) {
    is->seek_pos = pos;
    is->seek_flags = rel < 0 ? AVSEEK_FLAG_BACKWARD : 0;
    is->seek_req = 1;
  }
}

Now let's move on to our decode_thread , where we actually perform a search. In the source files you can see that we have marked the area "search is in progress". Well, we're going to put it there right now.

The search is centered around the av_seek_frame function . This function takes as argument the format context, stream, timestamp, and flag set. The function will look for the timestamp you give it. The unit of timestamp is the time_base of the stream that you pass to the function. However, you do not need to pass it to the stream (indicated by passing the value -1). If you do this, time_base will be in the avcodec internal time unitor 1000000fps. That's why we multiplied our position by AV_TIME_BASE when we set seek_pos .

However, sometimes you can (rarely) run into problems for some files if you pass av_seek_frame - 1 for the stream, so we are going to select the first stream in our file and pass it to av_seek_frame . Do not forget that we must change the scale of our timestamp in order to be in the new “coordinate system”.

if(is->seek_req) {
  int stream_index= -1;
  int64_t seek_target = is->seek_pos;

  if     (is->videoStream >= 0) stream_index = is->videoStream;
  else if(is->audioStream >= 0) stream_index = is->audioStream;

  if(stream_index>=0){
    seek_target= av_rescale_q(seek_target, AV_TIME_BASE_Q,
                      pFormatCtx->streams[stream_index]->time_base);
  }
  if(av_seek_frame(is->pFormatCtx, stream_index, 
                    seek_target, is->seek_flags) < 0) {
    fprintf(stderr, "%s: error while seeking\n",
            is->pFormatCtx->filename);
  } else {
     /* handle packet queues... more later... */

av_rescale_q ( a , b , c ) is a function that scales the timestamp from one base to another. It basically calculates a * b / c , but this function comes in handy because this calculation sometimes leads to overflow. AV_TIME_BASE_Q is a fractional version of AV_TIME_BASE . They are completely different: AV_TIME_BASE * time_in_seconds = avcodec_timestamp and AV_TIME_BASE_Q * avcodec_timestamp = time_in_seconds (but note that AV_TIME_BASE_Qis actually an AVRational object , so you need to use special q functions in avcodec to process it).

Buffer Cleaning

So, we set up our searches correctly, but have not finished yet. Remember, do we have a queue configured to accumulate packets? Now that we are in a different timestamp, we need to clear this queue, otherwise the search in the movie will not work! In addition, avcodec has its own internal buffers, which also need to be flushed for each stream.

To do this, you must first write a function that clears our packet queue. Then you need to somehow instruct the audio and video stream that they cleared the avcodec internal buffers . We can do this by placing a special packet in the queue after it is cleaned, and when they (threads) discover this special packet, they will simply clear their buffers.

Let's start with the reset function. It is really quite simple, so I will just show you the code:

static void packet_queue_flush(PacketQueue *q) {
  AVPacketList *pkt, *pkt1;

  SDL_LockMutex(q->mutex);
  for(pkt = q->first_pkt; pkt != NULL; pkt = pkt1) {
    pkt1 = pkt->next;
    av_free_packet(&pkt->pkt);
    av_freep(&pkt);
  }
  q->last_pkt = NULL;
  q->first_pkt = NULL;
  q->nb_packets = 0;
  q->size = 0;
  SDL_UnlockMutex(q->mutex);
}

Now that the queue is cleared, add our “cleansing packet”. But first, it would be nice to define what it is and create it:

AVPacket flush_pkt;

main() {
  ...
  av_init_packet(&flush_pkt);
  flush_pkt.data = "FLUSH";
  ...
}

Now put this package in the queue:

  } else {
    if(is->audioStream >= 0) {
      packet_queue_flush(&is->audioq);
      packet_queue_put(&is->audioq, &flush_pkt);
    }
    if(is->videoStream >= 0) {
      packet_queue_flush(&is->videoq);
      packet_queue_put(&is->videoq, &flush_pkt);
    }
  }
  is->seek_req = 0;
}

(This code snippet continues the above code snippet for decode_thread .) We also need to modify packet_queue_put so that we don’t duplicate a special package for cleaning:

int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

  AVPacketList *pkt1;
  if(pkt != &flush_pkt && av_dup_packet(pkt) < 0) {
    return -1;
  }

And then in the audio and video stream we put this call into avcodec_flush_buffers right after packet_queue_get :

    if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
      return -1;
    }
    if(pkt->data == flush_pkt.data) {
      avcodec_flush_buffers(is->audio_st->codec);
      continue;
    }

The above code snippet is exactly the same for the video stream, with the replacement of “audio” by “video”.

This is it! We did it! Compile your player:

gcc -o tutorial07 tutorial07.c -lavutil -lavformat -lavcodec -lswscale -lz -lm \
`sdl-config --cflags --libs`

and enjoy your movie player made in less than 1000 lines of C!

Although, of course, there are a lot of things that can be added or improved.

Afterword ← ⇑ →

So, we got a working player, but of course it is not as good as it could be. It would be possible to modify the file and add a lot of useful things:

Let's face it, this player sucks. The version of ffplay.c on which it is based is completely outdated, and as a result, this tutorial needs to be thoroughly revised. If you want to go to more serious projects using the FFmpeg libraries, I strongly recommend that you check the latest version of ffplay.c as the next task.
The error handling in our code is terrible and can be implemented much better.
, , , . , paused , , . , , . av_read_play. - , . , , . : , ffplay.c.
.
. , , , , VOB-.
. , .
. .
, , , , YUV, time_base.
.
--, ; ffplay.c .

If you want to know more about FFmpeg, then here we have considered far from everything. The next step is to study multimedia coding. It is best to start with the file output_example.c , which you will find in the FFmpeg distribution. I could write another textbook already on this topic, but it is unlikely to surpass this guide.

UPD.A long time ago I did not update this text, but meanwhile the world does not stand still. This tutorial requires only simple API updates; very little has changed in terms of basic concepts. Most of these updates have actually simplified the code. However, although I walked all over the code and updated it, FFplay is still superior to this toy player. Hand on heart, we admit: in these lessons we wrote a pretty lousy movie player. Therefore, if today (or in the future) you want to improve this tutorial, I recommend that you familiarize yourself with FFplay and find out what is missing. I believe that this mainly concerns the use of video equipment, but, quite possibly, I am missing some other obvious things. Perhaps a comparison with the current FFplay would lead to a radical rewrite of some things - I have not watched it yet.

But I am very proud that over the years my work has helped a lot, even taking into account the fact that people often searched for code elsewhere. I am immensely grateful to Chelyaev , who took upon himself the routine of replacing all the functions that are outdated since I wrote this monograph 8 (!) Years ago.

I rejoice in the hope that these lessons turned out to be useful and not boring. If there are any suggestions, errors, complaints, gratitude, etc. regarding this guide, please write to me at dranger dog gmail dot com. And yes, it makes no sense to ask me to help with your FFmpeg project. There are too many similar letters .

Appendix 1. List of functions ← ⇑ →

int avformat_open_input(AVFormatContext **ptr, const char * filename, AVInputFormat *fmt, AVDictionary **options)

Opens the name of the media file, saves the format context in the address specified in ptr .

fmt : if not NULL, then sets the file format.
buf_size : buffer size (optional).
options : AVDictionary is populated with the parameters of the AVFormatContext and demultiplexer.

void avformat_close_input(AVFormatContext **s)

Closes the media file. However, it does not close the codecs.

nt avio_open2 (AVIOContext **s, const char *url, int flags, const AVIOInterruptCB *int_cb, AVDictionary **options)

Creates an I / O context for using the resource specified in the url .

s : pointer to the place where the AVIOContext will be created . In case of failure, the specified value is set to NULL.
url : name of the resource to access.
flags : control the opening of the resource specified in the url .
int_cb : interrupt callback for protocol level use.
options : a dictionary populated with private protocol parameters. When the function returns, the parameter will be destroyed and replaced with a dict containing options that were not found. May be NULL.

int av_dup_packet(AVPacket *pkt)

Of course, this is a hack: if this package has not been allocated, we post it here. Returns 0 on success or AVERROR_NOMEM on failure.

int av_find_stream_info(AVFormatContext *s, AVDictionary **options)

This function searches for non-obvious stream information, such as frame rate. This is useful for headerless file formats such as MPEG. It is recommended to call after opening the file. Returns> = 0 if successful, AVERROR_ * in case of error.

AVFrame *avcodec_free_frame()

Old name for av_frame_free. Changed in lavc 55.28.1.

void av_frame_free (AVFrame **frame)

Releases a frame and any dynamically allocated objects in it, for example, extended_data.

void av_free(void *ptr)

Releases memory allocated using av_malloc () or av_realloc (). You can call this function with ptr == NULL. It is recommended that you call av_freep () instead.

void av_freep(void *ptr)

Frees memory and sets the pointer to NULL. Internally uses av_free ().

void av_free_packet(AVPacket *pkt)

Wrap around the package destruction method (pkt-> destruct).

int64_t av_gettime()

Get the current time in microseconds.

void av_init_packet(AVPacket *pkt)

Initializing optional package fields.

void *av_malloc(unsigned int size)

Memory allocation byte size with alignment suitable for all memory accesses (including vectors, if available on the CPU). av_malloc (0) should return a non-zero pointer.

void *av_mallocz(unsigned int size)

Same as av_malloc (), but initializes memory to zero.

double av_q2d(AVRational a)

Doubles AVRational.

int av_read_frame(AVFormatContext *s, AVPacket *pkt)

Returns the next stream frame. Information is stored as a package in pkt.

The returned package is valid until the next av_read_frame () or until av_close_input_file () and must be freed using av_free_packet. For a video package contains exactly one frame. For audio, it contains an integer number of frames if each frame has a known fixed size (for example, PCM or ADPCM data). If the audio frames are of variable size (for example, MPEG audio), then it contains one frame.

pkt-> pts, pkt-> dts and pkt-> duration are always set to the correct values in units of AVStream.timebase (and it is assumed that the format cannot provide them). pkt-> pts can be AV_NOPTS_VALUE if the video format has B frames, so it’s better to rely on pkt-> dts if you are not unpacking the payload.

Returned result: 0, if everything is in order, <0, if there is an error or end of file

void av_register_all();

Registers all codecs in the library.

int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)

Returns a * bq / cq .

int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int flags)

Searches for a keyframe at a timestamp.

stream_index : if stream_index is -1, the default stream is selected and the timestamp is automatically converted from AV_TIME_BASE units to a stream-specific time_base.
timestamp : timestamp measured in units of AVStream.time_base or, if no stream is specified, then in units of AV_TIME_BASE.
flags : set the parameters regarding the direction and search mode:
AVSEEK_FLAG_ANY: search in any frame, not just the key ones.
AVSEEK_FLAG_BACKWARD: search in the opposite direction.
AVSEEK_FLAG_BYTE: Search based on position in bytes.

AVFrame *avcodec_alloc_frame()

Old name for av_frame_alloc. Changed in lavc 55.28.1.

AVFrame *av_frame_alloc()

Selects an AVFrame and initializes it. May be freed using av_frame_free ().

int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame, int *got_frame_ptr, const AVPacket *avpkt)

Decodes an audio frame from avpkt to frame. The avcodec_decode_audio4 () function decodes an audio file from AVPacket. For its decoding, an audio codec is used, which was associated with avctx using avcodec_open2 (). The resulting decoded frame is stored in the specified AVFrame. If the frame has been unpacked, it will set got_frame_ptr to 1.

Warning: the input buffer, avpkt-> data, must be FF_INPUT_BUFFER_PADDING_SIZE larger than the actual read bytes, because some optimized bitstream readers read 32 or 64 bits at a time and can read up to the end.

avctx : codec context.
frame : target frame.
got_frame_ptr : target int, which will be set if the frame was unpacked.
AVPKT: AVPacket containing audio.

Returned result: if an error is returned, a negative value is returned, otherwise the number of bytes used from the input AVPacket is returned.

int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture, int *frameFinished, const AVPacket *avpkt)

Decodes a video frame from buf to an image. The avcodec_decode_video2 () function decodes a video frame from an input buffer of size buf_size. For its decoding, a video codec is used, which was associated with avctx using avcodec_open2 (). The resulting decoded frame is saved in the picture.

Warning: alignment examples and buffer problems that apply to avcodec_decode_audio4 apply to this function as well.

avctx : codec context.
picture : AVFrame in which the decoded video will be saved.
frameFinished : zero if no frames can be unpacked, otherwise it is not equal to zero.
avpkt: input AVPacket containing input buffer. You can create such a package using av_init_packet (), then, having specified data and size, some decoders may additionally need other fields, such as flags & AV_PKT_FLAG_KEY. All decoders are designed to use as few fields as possible.

Returned result: On error, a negative value is returned, otherwise the number of bytes is used or zero if no frames can be decompressed.

int64_t av_frame_get_best_effort_timestamp (const AVFrame *frame)

A simple access method to get best_effort_timestamp from an AVFrame object.

AVCodec *avcodec_find_decoder(enum CodecID id)

Searches for a decoder with CodecID. Returns NULL on error. It should be called after obtaining the required AVCodecContext from the stream in AVFormatContext using codecCtx-> codec_id.

void avcodec_flush_buffers(AVCodecContetx *avctx)

Buffer flush. Called when searching or switching to another stream.

AVCodecContext * avcodec_alloc_context3 (const AVCodec *codec)

Assigns AVCodecContext and sets its fields to default values.

int avcodec_copy_context (AVCodecContext *dest, const AVCodecContext *src)

Copy the settings of the source AVCodecContext to the target AVCodecContext. The resulting context of the destination codec will be closed, i.e. you must call avcodec_open2 () before you use this AVCodecContext to decode / encode video / audio data.

dest : must be initialized with avcodec_alloc_context3 (NULL), otherwise it will not be initialized.

int avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options)

Initializes avctx to use the codec specified in codec . Should be used after avcodec_find_decoder. Returns zero on success and a negative value on error.

int avpicture_fill(AVPicture *picture, uint8_t *ptr, int pix_fmt, int width, int height)

Sets the structure that the picture points to, with the ptr buffer , pix_fmt format , and the specified width and height. Returns the size of the image data in bytes.

int avpicture_get_size(int pix_fmt, int width, int height)

Calculates how many bytes are needed for an image of a given width, height and image format.

struct SwsContext* sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)

Returns SwsContext for use in sws_scale.

srcW , srcH , srcFormat : width, height and format of the desired pixels.
dstW , dstH , dstFormat : width, height and format of the final pixels.
flags : the scaling method to use.
The following options are available: SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC, SWS_X, SWS_POINT, SWS_AREA, SWS_BICUBLIN, SWS_GAUSS, SWS_SINC, SWS_LANCZOS, SWS_SPLINE.
Other flags include CPU capability flags: SWS_CPU_CAPS_MMX, SWS_CPU_CAPS_MMX2, SWS_CPU_CAPS_3DNOW, SWS_CPU_CAPS_ALTIVEC.
Other flags include (currently not fully implemented) SWS_FULL_CHR_H_INT, SWS_FULL_CHR_H_INP and SWS_DIRECT_BGR.
Finally, there is SWS_ACCURATE_RND and perhaps the most useful for beginners, SWS_PRINT_INFO.
I have no idea what most of them do. Maybe write to me?
srcFilter , dstFilter : SwsFilter for source and destination. SwsFilter enables color / brightness filtering. The default value is NULL.
param : should be a pointer to an int [2] buffer with coefficients. Not documented. It seems to be used to slightly modify standard scaling algorithms. The default value is NULL. Only for experts!

int sws_scale(SwsContext *c, uint8_t *src, int srcStride[], int srcSliceY, int srcSliceH, uint8_t dst[], int dstStride[]
sws_scale(sws_ctx, pFrame->data, pFrame->linesize, 0, is->video_st->codec->height, pict.data, pict.linesize);

Scales data in src according to our settings in our SwsContext * c .
srcStride and dstStride are the source and destination row sizes.

SDL_TimerID SDL_AddTimer(Uint32 interval, SDL_NewTimerCallback callback, void *param)

Adds a callback function that runs after a specified number of milliseconds. The callback function passes the current timer interval and the user-provided parameter from the SDL_AddTimer call and returns the next timer interval. (If the return value of the callback matches the passed value, the timer continues to work at the same speed.) If the return value of the callback is 0, the timer is canceled.
Another way to cancel the current timer is to call SDL_RemoveTimer with the timer identifier (which was returned from SDL_AddTimer).

The timer callback function may execute on a different thread than your main program, and therefore should not call any functions from itself. However, you can always call SDL_PushEvent.

The degree of detail of the timer depends on the platform, but you need to expect that it is at least 10 ms, since this is the most common value. This means that if you request a 16 ms timer, the callback will start after about 20 ms on an unloaded system. If you need to set a flag that signals updating frames at a speed of 30 frames per second (every 33 ms), you can set a timer for 30 ms (see example below). If you use this function, you need to pass SDL_INIT_TIMER to SDL_Init.

Returns the identifier value for the added timer, or NULL if an error occurs.

Format for callback:

Uint32 callback ( Uint32, void * param)

int SDL_CondSignal(SDL_cond *cond)

Restarting one of the threads waiting for the cond condition variable . Returns 0 on success and -1 on error.

int SDL_CondWait(SDL_cond *cond, SDL_mutex *mut);

Unlock the provided mutex and wait for another thread to call SDL_CondSignal or SDL_CondBroadcast for the cond condition variable, then re-lock the mutex. The mutex must be locked before entering this function. Returns 0 when a signal is received, or -1 on error.

SDL_cond *SDL_CreateCond(void);

Creates a condition variable.

SDL_Thread *SDL_CreateThread(int (*fn)(void *), void *data);

SDL_CreateThread creates a new thread of execution that shares the entire global memory of its parent, signal handlers, file descriptors, etc. And it runs the fn function , passing it the void pointer data. The thread terminates when fn returns a value.

void SDL_Delay (Uint32 );

Waits for the specified number of milliseconds. SDL_Delay will wait at least the specified time, but possibly longer due to OS planning.
Note: expect a delay granularity of at least 10 ms. Some platforms have shorter measures, but this is the most common option.

SDL_Overlay *SDL_CreateYUVOverlay(int width, int height, Uint32 format, SDL_Surface *display);

SDL_CreateYUVOverlay creates a YUV overlay of the specified width, height and format (for a list of available formats, see the SDL_Overlay data structure) for the provided display. Returns SDL_Overlay.

display should actually be a surface derived from SDL_SetVideoMode, otherwise this function will work by default.

The term “overlay” is incorrect, because if the overlay is not created in hardware, the contents of the display surface below the area where the overlay is displayed will be overwritten when the overlay is displayed.

int SDL_LockYUVOverlay(SDL_Overlay *overlay)

SDL_LockYUVOverlay blocks the overlay for direct access to pixel data. Returns 0 on success or -1 on error.

void SDL_UnlockYUVOverlay(SDL_Overlay *overlay)

Unlocks a previously locked overlay. The overlay must be unlocked before it can be displayed.

int SDL_DisplayYUVOverlay(SDL_Overlay *overlay, SDL_Rect *dstrect)

Puts the overlay on the surface specified when it was created. The SDL_Rect dstrect structure defines the position and size of the destination. If dstrect is more or less overlay, then the overlay will be scaled, this is optimized for 2x scaling. Returns 0 if successful.

void SDL_FreeYUVOverlay(SDL_Overlay *overlay)

Releases the overlay created by SDL_CreateYUVOverlay.

int SDL_Init(Uint32 flags);

Initializes the SDL. This should be called before all other SDL functions. The flags parameter specifies which parts of the SDL to initialize.

SDL_INIT_TIMER - initializes the timer subsystem.
SDL_INIT_AUDIO - initializes the audio subsystem.
SDL_INIT_VIDEO - initializes the video subsystem.
SDL_INIT_CDROM - initializes the CD-ROM subsystem.
SDL_INIT_JOYSTICK - initializes the joystick subsystem.
SDL_INIT_EVERYTHING - Initializes all of the above.
SDL_INIT_NOPARACHUTE - does not allow SDL to catch fatal errors.
SDL_INIT_EVENTTHREAD - launches the event manager in a separate thread.

Returns -1 on error or 0 on success. You can get an extended error message by calling SDL_GetError. A typical cause of an error is the use of a specific display without the corresponding support for the subsystem, for example, the absence of a mouse driver when using a frame buffer with the device. In this case, you can either compile SDL without a mouse, or set the environment variable "SDL_NOMOUSE = 1" before starting the application.

SDL_mutex *SDL_CreateMutex(void);

Creates a new, unlocked mutex.

int SDL_LockMutex(SDL_mutex *mutex)

SDL_LockMutex is an alias for SDL_mutexP. It blocks a mutex that was previously created using SDL_CreateMutex. If the mutex is already blocked by another thread, then SDL_mutexP does not return a value until the thread blocked by it unlocks it (using SDL_mutexV). When the mutex is called again, SDL_mutexV (aka SDL_UnlockMutex) must be called an equal number of times to return the mutex to an unlocked state. Returns 0 on success or -1 on error.

int SDL_UnlockMutex(SDL_Mutex *mutex)

Mutex unlock.

int SDL_OpenAudio(SDL_AudioSpec *desired, SDL_AudioSpec *obtained)

This function opens the audio unit with the required parameters and returns 0 if successful, placing the actual hardware parameters in the structure that it ultimately points to. If a NULL value is received, the audio data passed to the callback function will be guaranteed to have the required format and, if necessary, will be automatically converted to hardware audio format. This function returns -1 if the audio device could not be opened or the audio stream could not be configured.

To open an audio device, you need to create the desired SDL_AudioSpec. Then you need to fill this structure with the desired audio specifications.

desired-> freq : desired sound frequency in samples per second.
desired-> format : desired audio format (see SDL_AudioSpec).
desired-> channels: Required channels (1 for mono, 2 for stereo, 4 for surround sound, 6 for surround sound with centering and LFE).
desired-> samples : desired audio buffer size in samples. This number must be a power of two and can be adjusted by the audio driver to a value more suitable for the hardware. Optimum values range from 512 to 8192 inclusive, depending on the application and processor speed. Smaller values result in faster response times, but can lead to poor performance if the application performs heavy processing and cannot fill the audio buffer in time. The stereo sample consists of the right and left channels in LR order. Please note that the number of samples is directly related to time using the following formula: ms = (samples * 1000) / freq .
desired-> callback : must be set to a function that will be called when the audio unit is ready to receive additional data. The pointer to the audio buffer and the length in bytes of the audio buffer are transmitted. This function is usually executed in a separate thread, and therefore it is necessary to protect the data structures that it accesses by calling SDL_LockAudio and SDL_UnlockAudio in the code. The callback prototype is void callback ( void * userdata , Uint8 * stream , int len ) . userdata - a pointer stored in the userdata field SDL_AudioSpec. streamIs a pointer to the audio buffer that you want to fill with information, and len is the length of the audio buffer in bytes.
required-> userdata : this pointer is passed as the first parameter to the callback function.

SDL_OpenAudio reads these fields from the desired SDL_AudioSpec structure passed to the function and tries to find the audio configuration that matches your desire. As mentioned above, if the resulting parameter is NULL, then SDL is converted from the desired sound settings to equipment settings during playback.

If NULL is returned, then the required SDL_AudioSpec is your working specification, otherwise the resulting SDL_AudioSpec becomes a working specification, and the desired specification can be deleted. The data in the working specification is used when building SDL_AudioCVT to convert the downloaded data into the equipment format.

SDL_OpenAudio calculates size and silence fields for both the desired and the resulting specification. The size field stores the total size of the audio buffer in bytes, while silence stores the value used to represent silence in the audio buffer

The sound device starts playing silence when it is open and should be turned on for playback by calling SDL_PauseAudio (0) when you are ready to call the audio callback function. Since the audio driver can change the requested size of the audio buffer, you must select any local mixer buffers after opening the audio device.

void SDL_PauseAudio(int pause_on)

This function pauses and stops audio callback processing. It must be called with pause_on = 0 after opening the audio device in order to start playing the sound. This allows you to safely initialize the data for the callback function after opening the audio unit. Silence will be recorded on the audio device during a pause.

int SDL_PushEvent(SDL_Event *event)

An event queue that is actually used as a two-way communication channel. Not only events can be read from the queue, but the user can also put their own events in it. An event is a pointer to the structure of the event that you want to queue. The event is copied to the queue, and the caller can manage the memory pointed to after returning SDL_PushEvent. This function is thread-oriented and can be safely called from other threads. Returns 0 if successful, or -1 if the event could not be dispatched.

int SDL_WaitEvent(SDL_Event *event)

Waits indefinitely for the next available event, returning 0 if an error occurred while waiting for events, 1 otherwise. If the event is not NULL, the next event is removed from the queue and stored in this area.

void SDL_Quit()

Disables all SDL subsystems and frees the resources allocated to it. This should always be called before exiting.

SDL_Surface *SDL_SetVideoMode(int width, int height, int bitsperpixel, Uint32 flags)

Video mode setting with the specified width, height and pixel bits. Starting from SDL 1.2.10, if the width and height are 0, it will use the width and height of the current video mode (or desktop mode if the mode is not set). If bitsperpixel is 0, it is treated as the current display bits per pixel. The flags parameter is the same as the flags field of the SDL_Surface structure. Or a combination of the following values:

SDL_SWSURFACE - create a video surface in the system memory.
SDL_HWSURFACE - create a video surface in video memory.
SDL_ASYNCBLIT - enable the use of asynchronous updates to the display surface. This usually slows down work on single-processor computers, but can increase speed in SMP systems.
SDL_ANYFORMAT - usually, if a video surface with the requested bits per pixel (bpp - from bits-per-pixel) is not available, SDL will emulate video with a shaded surface. Passing SDL_ANYFORMAT prevents this and forces the SDL to use the surface of the video, regardless of its depth in pixels.
SDL_HWPALETTE - Provide SDL exclusive access to the palette. Without this flag, you cannot always get the colors you request using SDL_SetColors or SDL_SetPalette.
SDL_DOUBLEBUF - enable hardware double buffering; valid only with SDL_HWSURFACE. A call to SDL_Flip will reverse the buffers and refresh the screen. All drawing will take place on a surface that is not currently displayed. If double buffering cannot be enabled, then SDL_Flip will simply execute SDL_UpdateRect full screen.
SDL_FULLSCREEN SDL - try to use full screen mode. If changing the hardware resolution is not possible (for any reason), the next higher resolution will be used, and the display window will be centered on a black background.
SDL_OPENGL - create an OpenGL rendering context. It is assumed that the OpenGL video attributes with SDL_GL_SetAttribute are pre-set.
SDL_OPENGLBLIT - create an OpenGL rendering context, as described above, but allow normal blitting operations. A screen surface (2D) can have an alpha channel, and SDL_UpdateRects should be used to update screen surface changes. NOTE. This option is saved only for compatibility and will be removed in future versions. Not recommended for use in new code.
SDL_RESIZABL -create a resizable window. When the window size is changed by the user, the SDL_VIDEORESIZE event is generated, and SDL_SetVideoMode can be called again with a new size.
SDL_NOFRAME If possible, SDL_NOFRAME forces SDL to create a window without a title or framed. This flag is automatically set in full screen mode.
Note. No matter which SDL_SetVideoMode flags can satisfy, they are set in the flags element of the returned surface.
NOTE. Bit pixel 24 uses a packed representation of 3 bytes per pixel. For the more common 4 byte per pixel mode, use 32 bit pixel. Oddly enough, both 15 and 16 will request a 2 byte per pixel mode, but with different pixel formats.
NOTE. Use SDL_SWSURFACE if you plan to perform separate pixel manipulations or drag surfaces using alpha channels and require a high frame rate. When you use hardware surfaces (SDL_HWSURFACE), the SDL copies surfaces from video memory to system memory when you lock them and vice versa when you unlock them. This can lead to a significant decrease in performance. (Keep in mind that you can query for a hardware surface but still get a software surface. Many platforms can only provide a hardware surface when using SDL_FULLSCREEN.) SDL_HWSURFACE is best used when the surfaces you will be blotting can also be stored in video memory.
NOTE. If you want to control the position on the screen when creating the window surface, you can do this by setting the environment variables “SDL_VIDEO_CENTERED = center” or “SDL_VIDEO_WINDOW_POS = x, y”. You can install them via SDL_putenv.

Return value: Frame buffer surface or NULL in case of failure. The returned surface is freed by SDL_Quit and must not be freed by the caller.
NOTE. This rule includes consecutive calls to SDL_SetVideoMode (i.e., resizing) - the existing surface will be freed automatically.

Appendix 2. Data Structures ← ⇑ →

AVCodecContext

All information about the codec from the stream, from AVStream-> codec. Some important attributes:

AVRational time_base : number of frames per second
int sample_rate : samples per second
int channel : number of channels

See the full list (very impressive) here ( web archive, since the original link is already non-existent ). Many parameters are used mainly for encoding, not for decoding.

AVFormatContext

Data fields:

const AVClass * av_class
AVInputFormat * iformat
AVOutputFormat * oformat
void * priv_data :
ByteIOContext pb : used for low-level file manipulation.
unsigned int nb_streams : number of threads in the file.
AVStream * streams [MAX_STREAMS] : data for each stream is stored here.
char filename [1024]: but what about without it (in the original - duh ).

File Information:
int64_t timestamp :
char title [512]:
char author [512]:
char copyright [512]:
char comment [512]:
char album [512]:
int year :
int track :
char genre [32]:

int ctx_flags :
Possible values are AVFMT_NOFILE, AVFMT_NEEDNUMBER, AVFMT_SHOW_IDS, AVFMT_RAWPICTURE, AVFMT_GLOBALHEADER, AVFMT_NOTIMESTAMPS, AVFMT_GENERIC_INDEX
AVPacketList * packet_buffer : This buffer is needed only when packets are already buffered but not decoded, for example, to receive codec parameters in mpeg streams.
int64_tstart_time : at decoding: position of the first component frame, in fractions of a second, AV_TIME_BASE. NEVER set this value directly: it is inferred from the values of AVStream.
int64_t duration: decoding : duration of the stream, in fractions of AV_TIME_BASE. NEVER set this value directly: it is inferred from the values of AVStream.
int64_t file_size : total file size, 0 if unknown.
int bit_rate : decoding: total bitrate of the stream in bit / s, 0 if not available. NEVER set it directly if file_size and the duration known in ffmpeg can calculate it automatically.
AVStream * cur_st
const uint8_t * cur_ptr
int cur_len
AVPacket cur_pkt :
int64_t data_offset :
int index_built : offset of the first packet.
int mux_rate :
int packet_size :
int preload :
int max_delay :
int loop_output : number of output loops in supported formats.
int flags :
int loop_input :
unsigned int probesize : decoding: sample data size; not used in coding.
int max_analyze_duration : maximum duration in units of AV_TIME_BASE during which the input data must be analyzed in av_find_stream_info ()
const uint8_t * key :
int keylen :

AVIOContext

I / O context for accessing resources.

const AVClass * av_class : class for private settings.
unsigned char * buffer : start of buffer.
int buffer_size : maximum buffer size.
unsigned char * buf_ptr : current position in the buffer.
unsigned char * buf_end : Data may be smaller than buffer + buffer_size if the read function returned less data than requested, for example.
void * opaque : private pointer passed to read / write / search / ...
int (* read_packet) (void * opaque, uint8_t * buf, int buf_size) :
int (* write_packet) (void * opaque, uint8_t * buf, int buf_size ) :
int64_t (* seek) (void * opaque, int64_t offset, int whence) :
int64_t pos : position in the file of the current buffer.
int must_flush : true if the next search should be reset.
int eof_reached : true if end of file is reached.
int write_flag : true if open for writing.
int max_packet_size :
unsigned long checksum :
unsigned char * checksum_ptr :
unsigned long (* update_checksum) (unsigned long checksum, const uint8_t * buf, unsigned int size) :
int error : contains the error code or 0 if no error occurred.
int (* read_pause) (void * opaque, int pause): pause or resume playback for network streaming protocols, for example.
int64_t (* read_seek) (void * opaque, int stream_index, int64_t timestamp, int flags) : search for the specified timestamp in the stream with the specified stream_index index.
int seekable : combination of the AVIO_SEEKABLE_ or 0 flags when the stream is not searchable.
int64_t maxsize : maximum file size used to limit selection. This field is internal to libavformat, and access to it from the outside is prohibited.
int direct : avio_read and avio_write should be executed directly whenever possible, and not pass through the buffer, and avio_seek will always directly call the main search function.
int64_t bytes_read: byte read statistics This field is internal to libavformat and external access is denied.
int seek_count : search statistics. This field is internal to libavformat, and access to it from the outside is prohibited.
int writeout_count : write statistics. This field is internal to libavformat, and access to it from the outside is prohibited.
int orig_buffer_size : The original buffer size used internally after checking and providing a return to reset the buffer size. This field is internal to libavformat, and access to it from the outside is prohibited.

AVDictionary

Used to pass parameters to ffmpeg.

int count :
AVDictionaryEntry * elems :

AVDictionaryEntry

Used to store dictionary entries in AVDictionary.

char * ket :
char * value :

AVFrame

This structure depends on the type of codec and therefore is determined dynamically. However, there are common properties and methods for this structure:

uint8_t * data [4] :
int linesize [4] : information stride.
uint8_t * base [4] :
int key_frame :
int pict_type :
int64_t pts : these are not the pts you expect when decoding.
int coded_picture_number :
int display_picture_number :
int quality :
int age :
int reference :
int8_t * qscale_table :
int qstride :
uint8_t * mbskip_table :
int16_t (* motion_val [2]) [2]:
uint32_t * mb_type :
uint8_t motion_subsample_log2 :
void * opaque : user data
uint64_t error [4] :
int type :
int repeat_pict : instructs you to repeat the image a specified number of times.
int qscale_type :
int interlaced_frame :
int top_field_first :
AVPanScan * pan_scan :
int palette_has_changed :
int buffer_hints :
short * dct_coeff :
int8_t * ref_index [2] :

AVPacket

The structure in which the raw packet data is stored. This data must be transferred to avcodec_decode_audio2 or avcodec_decode_video in order to receive a frame.

int64_t pts : presentation timestamp in time_base units.
int64_t dts : time stamp of decompression in units of time_base.
uint8_t * data : raw data.
int size : data size.
int stream_index : stream from which AVPacket came, based on the quantity in AVFormatContext.
int flags : PKT_FLAG_KEY is set if the packet is a key frame.
int duration : duration of the presentation in units of time_base (0 if not available)
void (* destruct) (struct AVPacket *) : resource release function for this package (av_destruct_packet by default).
void * priv :
int64_t pos : byte position in the stream, -1 if unknown.

AVPacketList

A simple linked list for packages.

AVPacket pkt :
AVPacketList * next :

AVPicture

This structure is exactly the same as the first two AVFrame data elements, so it is often discarded. Commonly used in SWS functions.

uint8_t * data [4] :
int linesize [4] : the number of bytes in the string.

AVRational

Simple structure for representing rational numbers.

int num : numerator.
int den : denominator.

AVStream

The structure for the stream. You are likely to use this information in the codec most often.

int index :
int id :
AVCodecContext * codec :
AVRational r_frame_rate :
void * priv_data :
int64_t codec_info_duration :
int codec_info_nb_frames :
AVFrac pts :
AVRational time_base :
int pts_wrap_bits :
int stream_copy :
an enum AVDiscard the discard : you can choose the packages that will be thrown away because they do not need in demultiplexing.
float quality :
int64_t start_time :
int64_t duration:
Char language [4] :
int need_parsing : 1 -> need full parsing, 2 -> parse headers only, without repacking
AVCodecParserContext * parser :
int64_t cur_dts :
int last_IP_duration :
int64_t last_IP_pts :
AVIndexEntry * index_entries :
int nb_index_entries :
an unsigned int index_entries_allocated_size :
int64_t nb_frames : number of frames in this stream (if known) or 0
int64_t pts_buffer [MAX_REORDER_DELAY + 1] :

ByteIOContext

A structure that stores low-level information about a movie file.

unsigned char * buffer :
int buffer_size :
unsigned char * buf_ptr :
unsigned char * buf_end :
void * opaque :
int (* read_packet) (void * opaque, uint8_t * buf, int buf_size) :
int (* write_packet) (void * opaque, uint8_t * buf, int buf_size) :
offset_t (* seek) (void * opaque, offset_t offset, int whence) :
offset_t pos :
int must_flush :
int eof_reached :
int write_flag :
int is_streamed :
int max_packet_size :
unsigned long checksum :
unsigned char * checksum_ptr :
unsigned long (* update_checksum) (unsigned long checksum:
const uint8_t * buf, unsigned int size) :
int error : contains the error code or 0 if no error occurred.

SDL_AudioSpec

Used to describe the format of some audio data.

freq : sound frequency in samples per second.
format : audio data format.
channels : number of channels: 1 - mono, 2 - stereo, 4 surround, 6 surround with centering and LFE
silence : silence value of the sound buffer (calculated).
samples : audio buffer size in samples.
size : The size of the audio buffer in bytes (calculated).
callback (..) : callback function to fill the audio buffer.
userdata : a pointer to user data that is passed to the callback function.

The following format values are

valid : AUDIO_U8 - 8-bit unsigned samples.
AUDIO_S8 - signed 8-bit samples.
AUDIO_U16 or AUDIO_U16LSB - not supported by all hardware (unsigned 16-bit low byte order).
AUDIO_S16 or AUDIO_S16LS - not supported by all hardware (16-bit with old byte order)
AUDIO_U16MSB - not supported by all hardware (unsigned 16-bit big-endian).
AUDIO_S16MS - not supported by all hardware (16-bit with high byte order).
AUDIO_U16SYS: either AUDIO_U16LSB or AUDIO_U16MSB - depending on the hardware processor.
AUDIO_S16SYS: either AUDIO_S16LSB or AUDIO_S16MSB - depending on the hardware processor.

SDL_Event

The basic structure for events.

type : type of event.
active : activation event (see SDL_ActiveEvent).
key : keyboard event (see SDL_KeyboardEvent).
motion : mouse movement event (see SDL_MouseMotionEvent).
button : mouse click event (see SDL_MouseButtonEvent).
jaxis : joystick axis movement event (see SDL_JoyAxisEvent).
jball : joystick trackball movement event (see SDL_JoyBallEvent).
jhat : joystick header movement event (see SDL_JoyHatEvent).
jbutton : event of pressing the joystick button (see SDL_JoyButtonEvent).
resize: Application window resize event (see SDL_ResizeEvent).
expose : application window open event (see SDL_ExposeEvent).
quit : application exit request event (see SDL_QuitEvent).
user : user event (see SDL_UserEvent).
syswm : undefined window manager event (see SDL_SysWMEvent).

Here are the types of events. See SDL documentation for more information:.

SDL_ACTIVEEVENT SDL_ActiveEvent
SDL_KEYDOWN / the UP SDL_KeyboardEvent
SDL_MOUSEMOTION SDL_MouseMotionEvent
SDL_MOUSEBUTTONDOWN / the UP SDL_MouseButtonEvent
SDL_JOYAXISMOTION SDL_JoyAxisEvent
SDL_JOYBALLMOTION SDL_JoyBallEvent
SDL_JoyHatEvent SDL_JOYHATMOTION
SDL_JOYBUTTONDOWN / the UP SDL_JoyButtonEvent
SDL_VIDEORESIZE SDL_ResizeEvent
SDL_VIDEOEXPOSE SDL_ExposeEvent
SDL_Quit SDL_QuitEvent
SDL_USEREVENT SDL_UserEvent
SDL_SYSWMEVENT SDL_SysWMEvent

SDL_Overlay

YUV overlay.

format : overlay format (see below).
w, h : Width / height of the overlay.
planes : the number of plans to overlay. Usually either 1 or 3.
pitches : an array of indents, one for each plan. Indentation is the length of the string in bytes.
pixels : an array of data pointers for each plan. Overlay must be locked before using these pointers.
hw_overlay : Set to 1 if overlay is hardware accelerated.

SDL_Rect

The rectangular area.

Sint16 x, y : position of the upper left corner of the rectangle.
Uint16 w, h : the width and height of the rectangle.

SDL_Rect defines a rectangular area of pixels. It is used by SDL_BlitSurface to identify areas of blitting and some other video features.

SDL_Surface

Graphic structure of the external side (surface).

Uint32 flags : Flags of the external stotrons. Only for reading.
SDL_PixelFormat * format : read-only.
int w, h : width and height. Only for reading.
Uint16 pitch : step. Only for reading.
void * pixels : a pointer to the actual pixel data. For recording only.
SDL_Rect clip_rect : The rectangular outside of the clip. Only for reading.
int refcount : used to allocate memory. Mostly for reading.
This structure also contains private fields not shown here.

SDL_Surface represents an area of "graphic" memory that can be drawn. The video buffer frame is returned as SDL_Surface using SDL_SetVideoMode and SDL_GetVideoSurface. The fields w and h are values representing the width and height of the surface in pixels. The pixel field is a pointer to the actual pixel data. Note: the surface must be locked (via SDL_LockSurface) before accessing this field. The clip_rect field is the clipped rectangle set by SDL_SetClipRect.

The flag field supports the following OR values:

SDL_SWSURFACE - the outside is stored in system memory.
SDL_HWSURFACE - The external side is stored in video memory.
SDL_ASYNCBLIT - The outside uses asynchronous glare, if possible.
SDL_ANYFORMAT - Any pixel format (display surface) is allowed.
SDL_HWPALETTE - The surface has an exclusive palette.
SDL_DOUBLEBUF - double buffered surface (display surface).
SDL_FULLSCREEN - full-screen surface (display surface).
SDL_OPENGL - the surface has an OpenGL context (display surface).
SDL_OPENGLBLIT - the surface supports blining OpenGL (display surface). NOTE. This option is for compatibility only and is not recommended for new code.
SDL_RESIZABLE - resizing is possible for a surface (display surface).
SDL_HWACCEL - surface blit uses hardware acceleration.
SDL_SRCCOLORKEY - Superficiality uses color blitting.
SDL_RLEACCEL - color blitting is accelerated using RLE.
SDL_SRCALPHA - Surface Blyth uses alpha blending.
SDL_PREALLOC - The surface uses pre-allocated memory.

SDL_Thread

This structure is system independent, and you probably don't need to use it. See src / thread / sdl_thread_c.h in the source code for more information.

SDL_cond

This structure is system independent, and you probably don't need to use it. See src / thread / <system> /SDL_syscond.c in the source code for more information.

SDL_mutex

This structure is system independent, and you probably don't need to use it. See src / thread / <system> /SDL_sysmutex.c in the source code for more information.