ffmpeg: libavformat/libswresample to transcode and resample at same time - audio

I want to transcode and down/re-sample the audio for output using ffmpeg's libav*/libswresample - I am using ffmpeg's (4.x) transcode_aac.c and resample_audio.c as reference - but the code produces audio with glitches that is clearly not what ffmpeg itself would produce (ie ffmpeg -i foo.wav -ar 22050 foo.m4a)
Based on the ffmpeg examples, to resample audio it appears that I need to set the output AVAudioContext and SwrContext sample_rate to what I desire and ensure the swr_convert() is provided with the correct number of output samples based av_rescale_rnd( swr_delay(), ...) once I have an decoded input audio. I've taken care to ensure all the relevant calculations of samples for output are taken into account in the merged code (below):
open_output_file() - AVCodecContext.sample_rate (avctx variable) set to our target (down sampled) sample_rate
read_decode_convert_and_store() is where the work happens: input audio is decoded to an AVFrame and this input frame is converted before being encoded.
init_converted_samples() and av_samples_alloc() uses the input frame's nb_samples
ADDED: calc the number of output samples via av_rescale_rnd() and swr_delay()
UPDATED: convert_samples() and swr_convert() uses the input frame's samples and our calculated output samples as parameters
However the resulting audio file is produced with audio glitches. Does the community know of any references for how transcode AND resample should be done or what is missing in this example?
/* compile and run:
gcc -I/usr/include/ffmpeg transcode-swr-aac.c -lavformat -lavutil -lavcodec -lswresample -lm
./a.out foo.wav foo.m4a
*/
/*
* Copyright (c) 2013-2018 Andreas Unterweger
*
* This file is part of FFmpeg.
... ...
*
* #example transcode_aac.c
* Convert an input audio file to AAC in an MP4 container using FFmpeg.
* Formats other than MP4 are supported based on the output file extension.
* #author Andreas Unterweger (xxxx#xxxxx.com)
*/
#include <stdio.h>
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include "libavcodec/avcodec.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/frame.h"
#include "libavutil/opt.h"
#include "libswresample/swresample.h"
#define OUTPUT_BIT_RATE 128000
#define OUTPUT_CHANNELS 2
static int open_input_file(const char *filename,
AVFormatContext **input_format_context,
AVCodecContext **input_codec_context)
{
AVCodecContext *avctx;
const AVCodec *input_codec;
const AVStream *stream;
int error;
if ((error = avformat_open_input(input_format_context, filename, NULL,
NULL)) < 0) {
fprintf(stderr, "Could not open input file '%s' (error '%s')\n",
filename, av_err2str(error));
*input_format_context = NULL;
return error;
}
if ((error = avformat_find_stream_info(*input_format_context, NULL)) < 0) {
fprintf(stderr, "Could not open find stream info (error '%s')\n",
av_err2str(error));
avformat_close_input(input_format_context);
return error;
}
if ((*input_format_context)->nb_streams != 1) {
fprintf(stderr, "Expected one audio input stream, but found %d\n",
(*input_format_context)->nb_streams);
avformat_close_input(input_format_context);
return AVERROR_EXIT;
}
stream = (*input_format_context)->streams[0];
if (!(input_codec = avcodec_find_decoder(stream->codecpar->codec_id))) {
fprintf(stderr, "Could not find input codec\n");
avformat_close_input(input_format_context);
return AVERROR_EXIT;
}
avctx = avcodec_alloc_context3(input_codec);
if (!avctx) {
fprintf(stderr, "Could not allocate a decoding context\n");
avformat_close_input(input_format_context);
return AVERROR(ENOMEM);
}
/* Initialize the stream parameters with demuxer information. */
error = avcodec_parameters_to_context(avctx, stream->codecpar);
if (error < 0) {
avformat_close_input(input_format_context);
avcodec_free_context(&avctx);
return error;
}
/* Open the decoder for the audio stream to use it later. */
if ((error = avcodec_open2(avctx, input_codec, NULL)) < 0) {
fprintf(stderr, "Could not open input codec (error '%s')\n",
av_err2str(error));
avcodec_free_context(&avctx);
avformat_close_input(input_format_context);
return error;
}
/* Set the packet timebase for the decoder. */
avctx->pkt_timebase = stream->time_base;
/* Save the decoder context for easier access later. */
*input_codec_context = avctx;
return 0;
}
static int open_output_file(const char *filename,
AVCodecContext *input_codec_context,
AVFormatContext **output_format_context,
AVCodecContext **output_codec_context)
{
AVCodecContext *avctx = NULL;
AVIOContext *output_io_context = NULL;
AVStream *stream = NULL;
const AVCodec *output_codec = NULL;
int error;
if ((error = avio_open(&output_io_context, filename,
AVIO_FLAG_WRITE)) < 0) {
fprintf(stderr, "Could not open output file '%s' (error '%s')\n",
filename, av_err2str(error));
return error;
}
if (!(*output_format_context = avformat_alloc_context())) {
fprintf(stderr, "Could not allocate output format context\n");
return AVERROR(ENOMEM);
}
(*output_format_context)->pb = output_io_context;
if (!((*output_format_context)->oformat = av_guess_format(NULL, filename,
NULL))) {
fprintf(stderr, "Could not find output file format\n");
goto cleanup;
}
if (!((*output_format_context)->url = av_strdup(filename))) {
fprintf(stderr, "Could not allocate url.\n");
error = AVERROR(ENOMEM);
goto cleanup;
}
if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) {
fprintf(stderr, "Could not find an AAC encoder.\n");
goto cleanup;
}
/* Create a new audio stream in the output file container. */
if (!(stream = avformat_new_stream(*output_format_context, NULL))) {
fprintf(stderr, "Could not create new stream\n");
error = AVERROR(ENOMEM);
goto cleanup;
}
avctx = avcodec_alloc_context3(output_codec);
if (!avctx) {
fprintf(stderr, "Could not allocate an encoding context\n");
error = AVERROR(ENOMEM);
goto cleanup;
}
/* Set the basic encoder parameters.
* SET OUR DESIRED output sample_rate here
*/
avctx->channels = OUTPUT_CHANNELS;
avctx->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
// avctx->sample_rate = input_codec_context->sample_rate;
avctx->sample_rate = 22050;
avctx->sample_fmt = output_codec->sample_fmts[0];
avctx->bit_rate = OUTPUT_BIT_RATE;
avctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
/* Set the sample rate for the container. */
stream->time_base.den = avctx->sample_rate;
stream->time_base.num = 1;
if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if ((error = avcodec_open2(avctx, output_codec, NULL)) < 0) {
fprintf(stderr, "Could not open output codec (error '%s')\n",
av_err2str(error));
goto cleanup;
}
error = avcodec_parameters_from_context(stream->codecpar, avctx);
if (error < 0) {
fprintf(stderr, "Could not initialize stream parameters\n");
goto cleanup;
}
/* Save the encoder context for easier access later. */
*output_codec_context = avctx;
return 0;
cleanup:
avcodec_free_context(&avctx);
avio_closep(&(*output_format_context)->pb);
avformat_free_context(*output_format_context);
*output_format_context = NULL;
return error < 0 ? error : AVERROR_EXIT;
}
/**
* Initialize one data packet for reading or writing.
*/
static int init_packet(AVPacket **packet)
{
if (!(*packet = av_packet_alloc())) {
fprintf(stderr, "Could not allocate packet\n");
return AVERROR(ENOMEM);
}
return 0;
}
static int init_input_frame(AVFrame **frame)
{
if (!(*frame = av_frame_alloc())) {
fprintf(stderr, "Could not allocate input frame\n");
return AVERROR(ENOMEM);
}
return 0;
}
static int init_resampler(AVCodecContext *input_codec_context,
AVCodecContext *output_codec_context,
SwrContext **resample_context)
{
int error;
/**
* create the resample, including ref to the desired output sample rate
*/
*resample_context = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(output_codec_context->channels),
output_codec_context->sample_fmt,
output_codec_context->sample_rate,
av_get_default_channel_layout(input_codec_context->channels),
input_codec_context->sample_fmt,
input_codec_context->sample_rate,
0, NULL);
if (!*resample_context < 0) {
fprintf(stderr, "Could not allocate resample context\n");
return AVERROR(ENOMEM);
}
if ((error = swr_init(*resample_context)) < 0) {
fprintf(stderr, "Could not open resample context\n");
swr_free(resample_context);
return error;
}
return 0;
}
static int init_fifo(AVAudioFifo **fifo, AVCodecContext *output_codec_context)
{
if (!(*fifo = av_audio_fifo_alloc(output_codec_context->sample_fmt,
output_codec_context->channels, 1))) {
fprintf(stderr, "Could not allocate FIFO\n");
return AVERROR(ENOMEM);
}
return 0;
}
static int write_output_file_header(AVFormatContext *output_format_context)
{
int error;
if ((error = avformat_write_header(output_format_context, NULL)) < 0) {
fprintf(stderr, "Could not write output file header (error '%s')\n",
av_err2str(error));
return error;
}
return 0;
}
static int decode_audio_frame(AVFrame *frame,
AVFormatContext *input_format_context,
AVCodecContext *input_codec_context,
int *data_present, int *finished)
{
AVPacket *input_packet;
int error;
error = init_packet(&input_packet);
if (error < 0)
return error;
*data_present = 0;
*finished = 0;
if ((error = av_read_frame(input_format_context, input_packet)) < 0) {
if (error == AVERROR_EOF)
*finished = 1;
else {
fprintf(stderr, "Could not read frame (error '%s')\n",
av_err2str(error));
goto cleanup;
}
}
if ((error = avcodec_send_packet(input_codec_context, input_packet)) < 0) {
fprintf(stderr, "Could not send packet for decoding (error '%s')\n",
av_err2str(error));
goto cleanup;
}
error = avcodec_receive_frame(input_codec_context, frame);
if (error == AVERROR(EAGAIN)) {
error = 0;
goto cleanup;
} else if (error == AVERROR_EOF) {
*finished = 1;
error = 0;
goto cleanup;
} else if (error < 0) {
fprintf(stderr, "Could not decode frame (error '%s')\n",
av_err2str(error));
goto cleanup;
} else {
*data_present = 1;
goto cleanup;
}
cleanup:
av_packet_free(&input_packet);
return error;
}
static int init_converted_samples(uint8_t ***converted_input_samples,
AVCodecContext *output_codec_context,
int frame_size)
{
int error;
if (!(*converted_input_samples = calloc(output_codec_context->channels,
sizeof(**converted_input_samples)))) {
fprintf(stderr, "Could not allocate converted input sample pointers\n");
return AVERROR(ENOMEM);
}
if ((error = av_samples_alloc(*converted_input_samples, NULL,
output_codec_context->channels,
frame_size,
output_codec_context->sample_fmt, 0)) < 0) {
fprintf(stderr,
"Could not allocate converted input samples (error '%s')\n",
av_err2str(error));
av_freep(&(*converted_input_samples)[0]);
free(*converted_input_samples);
return error;
}
return 0;
}
static int convert_samples(const uint8_t **input_data, const int input_nb_samples,
uint8_t **converted_data, const int output_nb_samples,
SwrContext *resample_context)
{
int error;
if ((error = swr_convert(resample_context,
converted_data, output_nb_samples,
input_data , input_nb_samples)) < 0) {
fprintf(stderr, "Could not convert input samples (error '%s')\n",
av_err2str(error));
return error;
}
return 0;
}
static int add_samples_to_fifo(AVAudioFifo *fifo,
uint8_t **converted_input_samples,
const int frame_size)
{
int error;
if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) {
fprintf(stderr, "Could not reallocate FIFO\n");
return error;
}
if (av_audio_fifo_write(fifo, (void **)converted_input_samples,
frame_size) < frame_size) {
fprintf(stderr, "Could not write data to FIFO\n");
return AVERROR_EXIT;
}
return 0;
}
static int read_decode_convert_and_store(AVAudioFifo *fifo,
AVFormatContext *input_format_context,
AVCodecContext *input_codec_context,
AVCodecContext *output_codec_context,
SwrContext *resampler_context,
int *finished)
{
AVFrame *input_frame = NULL;
uint8_t **converted_input_samples = NULL;
int data_present;
int ret = AVERROR_EXIT;
if (init_input_frame(&input_frame))
goto cleanup;
if (decode_audio_frame(input_frame, input_format_context,
input_codec_context, &data_present, finished))
goto cleanup;
if (*finished) {
ret = 0;
goto cleanup;
}
if (data_present) {
/* Initialize the temporary storage for the converted input samples. */
if (init_converted_samples(&converted_input_samples, output_codec_context,
input_frame->nb_samples))
goto cleanup;
/* figure out how many samples are required for target sample_rate incl
* any items left in the swr buffer
*/
int output_nb_samples = av_rescale_rnd(
swr_get_delay(resampler_context, input_codec_context->sample_rate) + input_frame->nb_samples,
output_codec_context->sample_rate,
input_codec_context->sample_rate,
AV_ROUND_UP);
/* ignore, just to ensure we've got enough buffer alloc'd for conversion buffer */
av_assert1(input_frame->nb_samples > output_nb_samples);
/* Convert the input samples to the desired output sample format, via swr_convert().
*/
if (convert_samples((const uint8_t**)input_frame->extended_data, input_frame->nb_samples,
converted_input_samples, output_nb_samples,
resampler_context))
goto cleanup;
/* Add the converted input samples to the FIFO buffer for later processing. */
if (add_samples_to_fifo(fifo, converted_input_samples,
output_nb_samples))
goto cleanup;
ret = 0;
}
ret = 0;
cleanup:
if (converted_input_samples) {
av_freep(&converted_input_samples[0]);
free(converted_input_samples);
}
av_frame_free(&input_frame);
return ret;
}
static int init_output_frame(AVFrame **frame,
AVCodecContext *output_codec_context,
int frame_size)
{
int error;
if (!(*frame = av_frame_alloc())) {
fprintf(stderr, "Could not allocate output frame\n");
return AVERROR_EXIT;
}
/* Set the frame's parameters, especially its size and format.
* av_frame_get_buffer needs this to allocate memory for the
* audio samples of the frame.
* Default channel layouts based on the number of channels
* are assumed for simplicity. */
(*frame)->nb_samples = frame_size;
(*frame)->channel_layout = output_codec_context->channel_layout;
(*frame)->format = output_codec_context->sample_fmt;
(*frame)->sample_rate = output_codec_context->sample_rate;
/* Allocate the samples of the created frame. This call will make
* sure that the audio frame can hold as many samples as specified. */
if ((error = av_frame_get_buffer(*frame, 0)) < 0) {
fprintf(stderr, "Could not allocate output frame samples (error '%s')\n",
av_err2str(error));
av_frame_free(frame);
return error;
}
return 0;
}
/* Global timestamp for the audio frames. */
static int64_t pts = 0;
/**
* Encode one frame worth of audio to the output file.
*/
static int encode_audio_frame(AVFrame *frame,
AVFormatContext *output_format_context,
AVCodecContext *output_codec_context,
int *data_present)
{
AVPacket *output_packet;
int error;
error = init_packet(&output_packet);
if (error < 0)
return error;
/* Set a timestamp based on the sample rate for the container. */
if (frame) {
frame->pts = pts;
pts += frame->nb_samples;
}
*data_present = 0;
error = avcodec_send_frame(output_codec_context, frame);
if (error < 0 && error != AVERROR_EOF) {
fprintf(stderr, "Could not send packet for encoding (error '%s')\n",
av_err2str(error));
goto cleanup;
}
error = avcodec_receive_packet(output_codec_context, output_packet);
if (error == AVERROR(EAGAIN)) {
error = 0;
goto cleanup;
} else if (error == AVERROR_EOF) {
error = 0;
goto cleanup;
} else if (error < 0) {
fprintf(stderr, "Could not encode frame (error '%s')\n",
av_err2str(error));
goto cleanup;
} else {
*data_present = 1;
}
/* Write one audio frame from the temporary packet to the output file. */
if (*data_present &&
(error = av_write_frame(output_format_context, output_packet)) < 0) {
fprintf(stderr, "Could not write frame (error '%s')\n",
av_err2str(error));
goto cleanup;
}
cleanup:
av_packet_free(&output_packet);
return error;
}
/**
* Load one audio frame from the FIFO buffer, encode and write it to the
* output file.
*/
static int load_encode_and_write(AVAudioFifo *fifo,
AVFormatContext *output_format_context,
AVCodecContext *output_codec_context)
{
AVFrame *output_frame;
/* Use the maximum number of possible samples per frame.
* If there is less than the maximum possible frame size in the FIFO
* buffer use this number. Otherwise, use the maximum possible frame size. */
const int frame_size = FFMIN(av_audio_fifo_size(fifo),
output_codec_context->frame_size);
int data_written;
if (init_output_frame(&output_frame, output_codec_context, frame_size))
return AVERROR_EXIT;
/* Read as many samples from the FIFO buffer as required to fill the frame.
* The samples are stored in the frame temporarily. */
if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) {
fprintf(stderr, "Could not read data from FIFO\n");
av_frame_free(&output_frame);
return AVERROR_EXIT;
}
/* Encode one frame worth of audio samples. */
if (encode_audio_frame(output_frame, output_format_context,
output_codec_context, &data_written)) {
av_frame_free(&output_frame);
return AVERROR_EXIT;
}
av_frame_free(&output_frame);
return 0;
}
/**
* Write the trailer of the output file container.
*/
static int write_output_file_trailer(AVFormatContext *output_format_context)
{
int error;
if ((error = av_write_trailer(output_format_context)) < 0) {
fprintf(stderr, "Could not write output file trailer (error '%s')\n",
av_err2str(error));
return error;
}
return 0;
}
int main(int argc, char **argv)
{
AVFormatContext *input_format_context = NULL, *output_format_context = NULL;
AVCodecContext *input_codec_context = NULL, *output_codec_context = NULL;
SwrContext *resample_context = NULL;
AVAudioFifo *fifo = NULL;
int ret = AVERROR_EXIT;
if (argc != 3) {
fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
exit(1);
}
if (open_input_file(argv[1], &input_format_context,
&input_codec_context))
goto cleanup;
if (open_output_file(argv[2], input_codec_context,
&output_format_context, &output_codec_context))
goto cleanup;
if (init_resampler(input_codec_context, output_codec_context,
&resample_context))
goto cleanup;
if (init_fifo(&fifo, output_codec_context))
goto cleanup;
if (write_output_file_header(output_format_context))
goto cleanup;
while (1) {
/* Use the encoder's desired frame size for processing. */
const int output_frame_size = output_codec_context->frame_size;
int finished = 0;
while (av_audio_fifo_size(fifo) < output_frame_size) {
/* Decode one frame worth of audio samples, convert it to the
* output sample format and put it into the FIFO buffer. */
if (read_decode_convert_and_store(fifo, input_format_context,
input_codec_context,
output_codec_context,
resample_context, &finished))
goto cleanup;
if (finished)
break;
}
while (av_audio_fifo_size(fifo) >= output_frame_size ||
(finished && av_audio_fifo_size(fifo) > 0))
if (load_encode_and_write(fifo, output_format_context,
output_codec_context))
goto cleanup;
if (finished) {
int data_written;
do {
if (encode_audio_frame(NULL, output_format_context,
output_codec_context, &data_written))
goto cleanup;
} while (data_written);
break;
}
}
if (write_output_file_trailer(output_format_context))
goto cleanup;
ret = 0;
cleanup:
if (fifo)
av_audio_fifo_free(fifo);
swr_free(&resample_context);
if (output_codec_context)
avcodec_free_context(&output_codec_context);
if (output_format_context) {
avio_closep(&output_format_context->pb);
avformat_free_context(output_format_context);
}
if (input_codec_context)
avcodec_free_context(&input_codec_context);
if (input_format_context)
avformat_close_input(&input_format_context);
return ret;
}

After going through the ffmpeg/libav mailing list, particularly https://ffmpeg.org/pipermail/libav-user/2017-July/010496.html, I was able to modify the ffmpeg transcode_aac.c example to perform the sample rate conversion.
In the original code, the main loop reads/decode/covert/store in one function before passing the samples to a AVAudioFifo which is used by the encoder.
Some encoders expects a specific number of samples - if you provide less, it appears the encoder pads up to expected and this results in the glitches mentioned in my first attempt.
The key, as per the ffmpeg mailing list, is to buffer / concat the decoded input samples until we have enough samples for at least one frame for the encoder. To do this we split the read/decode from the convert/store with the read/decode data being stored in a new intermediary AVAudioFifo. Once the intermediary fifo has enough samples, they get converted and the output is added to the original fifo.
static int read_decode_and_store(AVAudioFifo *fifo,
AVFormatContext *input_format_context,
AVCodecContext *input_codec_context,
const int audio_stream_idx,
int *finished)
{
AVFrame *input_frame = NULL;
int data_present = 0;
int ret = AVERROR_EXIT;
if (init_input_frame(&input_frame))
goto cleanup;
if (decode_audio_frame(input_frame, input_format_context,
input_codec_context, audio_stream_idx, &data_present, finished))
goto cleanup;
if (*finished) {
ret = 0;
goto cleanup;
}
if (data_present) {
/* Add the converted input samples to the FIFO buffer for later processing. */
if (add_samples_to_fifo(fifo, (uint8_t**)input_frame->extended_data, input_frame->nb_samples))
goto cleanup;
}
ret = 0;
cleanup:
av_frame_free(&input_frame);
return ret;
}
static int load_convert_and_store(AVAudioFifo* output_samples_fifo, const AVFormatContext* output_context, AVCodecContext* output_codec_context, int output_frame_size,
AVAudioFifo* input_samples_fifo, const AVFormatContext* input_context, AVCodecContext* input_codec_context,
SwrContext* resample_context)
{
uint8_t **converted_input_samples = NULL;
int ret = AVERROR_EXIT;
AVFrame *input_frame;
const int frame_size = FFMIN(av_audio_fifo_size(input_samples_fifo),
output_frame_size);
// yes this is init_output_frame
if (init_output_frame(&input_frame, input_codec_context, frame_size))
return AVERROR_EXIT;
if (av_audio_fifo_read(input_samples_fifo, (void **)input_frame->data, frame_size) < frame_size) {
fprintf(stderr, "Could not read data from input samples FIFO");
av_frame_free(&input_frame);
return AVERROR_EXIT;
}
int nb_samples = (output_codec_context->sample_rate == input_codec_context->sample_rate) ?
input_frame->nb_samples :
av_rescale_rnd(swr_get_delay(resample_context, input_codec_context->sample_rate) + input_frame->nb_samples, output_codec_context->sample_rate, input_codec_context->sample_rate, AV_ROUND_UP);
if (init_converted_samples(&converted_input_samples, output_codec_context,
nb_samples))
goto cleanup;
/* **** Modify convert_samples() to return the value from swr_convert() **** */
if ( (nb_samples = convert_samples((const uint8_t**)input_frame->extended_data, input_frame->nb_samples,
converted_input_samples, output_codec_context->frame_size,
resample_context)) < 0)
goto cleanup;
if (add_samples_to_fifo(output_samples_fifo, converted_input_samples, nb_samples))
goto cleanup;
ret = 0;
cleanup:
if (converted_input_samples) {
av_freep(&converted_input_samples[0]);
free(converted_input_samples);
}
av_frame_free(&input_frame);
return ret;
}
int main()
{
...
while (1)
{
const int output_frame_size = output_codec_context->frame_size;
int finished = 0;
/* Re: Resample frame to specified number of samples
* https://ffmpeg.org/pipermail/libav-user/2017-July/010496.html
* Yes, you need to buffer sufficient audio frames to feed to the encoder.
*
* Calculate the number of in samples:
in_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, c->sample_rate) +
out_nb_samples,
in_sample_rate, c->sample_rate, AV_ROUND_DOWN);
then allocate buffers to concatenate the in samples until you have enough
to pass to swr_ctx.
*/
while (av_audio_fifo_size(input_samples_fifo) < output_frame_size) {
if (read_decode_and_store(input_samples_fifo,
input_format_context, input_codec_context,
audio_stream_idx,
&finished))
goto cleanup;
if (finished)
break;
}
while (av_audio_fifo_size(input_samples_fifo) >= output_frame_size ||
(finished && av_audio_fifo_size(input_samples_fifo) > 0)) {
/* take all input samples and convert them before handing off to encoder
*/
if (load_convert_and_store(fifo,
output_format_context, output_codec_context, output_frame_size,
input_samples_fifo, input_format_context, input_codec_context,
resample_context))
goto cleanup;
}
}
/* If we have enough samples for the encoder, we encode them.
* At the end of the file, we pass the remaining samples to
* the encoder. */
.... // existing code
}

Related

convert pcm stream data to encoded aac data

I tried to convert pulse-audio pcm stream data to aac encoded data using ffmpeg. But after encoding I get noise-full data, not the correct one. Here I post my code, anyone help me with some ideas.
Initial configuration:
av_register_all();
int error;
if ((error = avio_open(&output_io_context,"out.aac",AVIO_FLAG_WRITE))<0) {
printf("could not open output file\n");
}
if (!(output_format_context = avformat_alloc_context())) {
printf("output_format_context error\n");
}
output_format_context->pb = output_io_context;
if(!(output_format_context->oformat = av_guess_format(NULL, "out.aac", NULL))) {
printf("guess format error\n");
}
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (codec == NULL) {
printf("avcodec_find_encoder: ERROR\n");
}
if (!(stream = avformat_new_stream(output_format_context, NULL))) {
printf("stream create error\n");
}
output_codec_context = avcodec_alloc_context3(codec);
if(!output_codec_context) {
printf("output_codec_context is null\n");
}
output_codec_context->channels = CHANNELS;
output_codec_context->channel_layout = av_get_default_channel_layout(CHANNELS);
output_codec_context->sample_rate = SAMPLE_RATE; //input_codec_context->sample_rate;
output_codec_context->sample_fmt = codec->sample_fmts[0];
output_codec_context->bit_rate = 48000; //OUTPUT_BIT_RATE;
stream->time_base.den = SAMPLE_RATE;//input_codec_context->sample_rate;
stream->time_base.num = 1;
if(output_format_context->oformat->flags & AVFMT_GLOBALHEADER)
output_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if ((error = avcodec_open2(output_codec_context, codec, NULL)) < 0) {
printf("error");
}
error = avcodec_parameters_from_context(stream->codecpar, output_codec_context);
if (write_output_file_header(output_format_context)) {
printf("write header failure...\n");
}
Data encoding:
AVFrame *output_frame;
int frame_pos = 0, ctx_frame_size = output_codec_context->frame_size;
int size = av_samples_get_buffer_size(NULL, CHANNELS,
output_codec_context->frame_size,output_codec_context->sample_fmt, 1);
if((x = avcodec_fill_audio_frame(output_frame, CHANNELS,
output_codec_context->sample_fmt, data, length, 1)) < 0) {
printf("avcodec_fill_audio_frame error : %s\n", av_err2str(x));
}
int data_written;
if (encode_audio_frame(output_frame, output_format_context,
output_codec_context, &data_written)) {
printf("encode_audio_frame error\n");
}
av_frame_free(&output_frame);
helper_function :
int encode_audio_frame(AVFrame *frame,AVFormatContext *output_format_context,
AVCodecContext *output_codec_context, int *data_present)
{
AVPacket output_packet;
int error;
init_packet(&output_packet);
if (frame) {
frame->pts = pts;
pts += frame->nb_samples;
}
error = avcodec_send_frame(output_codec_context, frame);
if (error == AVERROR_EOF) {
error = 0;
goto cleanup;
} else if (error < 0) {
fprintf(stderr, "Could not send packet for encoding (error '%s')\n",
av_err2str(error));
return error;
}
error = avcodec_receive_packet(output_codec_context, &output_packet);
if (error == AVERROR(EAGAIN)) {
error = 0;
goto cleanup;
} else if (error == AVERROR_EOF) {
error = 0;
goto cleanup;
} else if (error < 0) {
fprintf(stderr, "Could not encode frame (error '%s')\n",
av_err2str(error));
goto cleanup;
} else {
*data_present = 1;
}
if (*data_present &&
(error = av_write_frame(output_format_context, &output_packet)) < 0) {
fprintf(stderr, "Could not write frame (error '%s')\n",
av_err2str(error));
goto cleanup;
}
cleanup:
av_packet_unref(&output_packet);
return error;
}
Do we need to fill AVFrame with sizeof(av_samples_get_buffer_size) or context->frame_size ?
TYIA :) !!

FFMPEG AAC encoder issue

I am trying to capture and encode audio data, I am encoding audio using
FFMPEG AAC and to capture PCM data I used ALSA, Capturing part is working in my case, However, AAC encoder is not working.
I am trying to play test.aac file using
ffplay test.aac
but it contains lots of noise.
Attaching code for aac encoder :
#include "AudioEncoder.h"
void* AudioEncoder::run(void *ctx)
{
return ((AudioEncoder *)ctx)->execute();
}
static int frameCount = 0;
void* AudioEncoder::execute(void)
{
float buf[size], *temp;
int totalSize = 0;
int fd = open("in.pcm", O_CREAT| O_RDWR, 0666);
int frameSize = 128 * snd_pcm_format_width(SND_PCM_FORMAT_FLOAT) / 8 * 2;
av_new_packet(&pkt,size);
cout << size << endl;
while (!Main::stopThread)
{
temp = (Main::fbAudio)->dequeue();
memcpy(buf + totalSize, temp, frameSize);
write(fd, temp, frameSize); // Can play in.pcm with no noise in it.
totalSize += frameSize;
delete temp;
if (totalSize >= size)
{
totalSize = 0;
//frame_buf = (uint8_t *) buf;
pFrame->data[0] = (uint8_t *)buf; //PCM Data
pFrame->pts=frameCount;
frameCount++;
got_frame=0;
//Encode
ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);
if(ret < 0){
cerr << "Failed to encode!\n";
return NULL;
}
if (got_frame==1){
printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);
pkt.stream_index = audio_st->index;
#ifdef DUMP_TEST
ret = av_write_frame(pFormatCtx, &pkt);
#endif
av_free_packet(&pkt);
}
//memset(buf, 0, sizeof(float)*size);
}
//delete temp;
//if (buf.size() >= m_audio_output_decoder_ctx->frame_size)
/* encode the audio*/
}
close(fd);
Main::stopThread = true;
return NULL;
}
int AudioEncoder::flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index){
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame){
ret=0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);
/* mux encoded frame */
#ifdef DUMP_TEST
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
#endif
}
return ret;
}
void AudioEncoder::start(void)
{
pthread_t encoder;
pthread_create(&encoder, NULL, &AudioEncoder::run, this);
}
AudioEncoder::AudioEncoder() : out_file("test.aac")
{
got_frame = 0;
ret = 0;
size = 0;
av_register_all();
avcodec_register_all();
//Method 1.
pFormatCtx = avformat_alloc_context();
fmt = av_guess_format(NULL, out_file, NULL);
pFormatCtx->oformat = fmt;
#ifdef DUMP_TEST
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){
cerr << "Failed to open output file!\n";
return;
}
#endif
audio_st = avformat_new_stream(pFormatCtx, 0);
if (audio_st==NULL){
return;
}
pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
pCodecCtx->sample_rate= 8000;
pCodecCtx->channel_layout=AV_CH_LAYOUT_STEREO;
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
// pCodecCtx->bit_rate = 64000;
#ifdef DUMP_TEST
//Show some information
av_dump_format(pFormatCtx, 0, out_file, 1);
#endif
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec){
printf("Can not find encoder!\n");
return;
}
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
printf("Failed to open encoder!\n");
return;
}
pFrame = av_frame_alloc();
pFrame->nb_samples= pCodecCtx->frame_size;
pFrame->format= pCodecCtx->sample_fmt;
size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);
//Write Header
#ifdef DUMP_TEST
avformat_write_header(pFormatCtx,NULL);
#endif
}
AudioEncoder::~AudioEncoder()
{
//Flush Encoder
ret = flush_encoder(pFormatCtx,0);
if (ret < 0) {
cerr << "Flushing encoder failed\n";
return;
}
#ifdef DUMP_TEST
//Write Trailer
av_write_trailer(pFormatCtx);
#endif
//Clean
if (audio_st){
avcodec_close(audio_st->codec);
av_free(pFrame);
av_free(frame_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
}
Here, please ignore DUMP_TEST flag, I already enabled it.
Can some one tell me what is issue ?
Thanks,
Harshil
I am able to resolve this issue, by correctly passing buffer from ALSA to AAC encoder.
Here AAC expects buffer size of 4096 bytes, but from deque I am passing 1024 bytes which causes issue, also I updated audio channels to MONO, in place of STEREO. Attaching my working code snippet for more information:
#include "AudioEncoder.h"
void* AudioEncoder::run(void *ctx)
{
return ((AudioEncoder *)ctx)->execute();
}
static int frameCount = 0;
void* AudioEncoder::execute(void)
{
float *temp;
#ifdef DUMP_TEST
int fd = open("in.pcm", O_CREAT| O_RDWR, 0666);
#endif
int frameSize = 1024 * snd_pcm_format_width(SND_PCM_FORMAT_FLOAT) / 8 * 1;
av_new_packet(&pkt,size);
while (!Main::stopThread)
{
temp = (Main::fbAudio)->dequeue();
frame_buf = (uint8_t *) temp;
pFrame->data[0] = frame_buf;
pFrame->pts=frameCount*100;
frameCount++;
got_frame=0;
//Encode
ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);
if(ret < 0){
cerr << "Failed to encode!\n";
return NULL;
}
if (got_frame==1){
cout << "Encoded frame\n";
pkt.stream_index = audio_st->index;
#ifdef DUMP_TEST
write(fd, temp, frameSize);
ret = av_interleaved_write_frame(pFormatCtx, &pkt);
#endif
av_free_packet(&pkt);
}
delete temp;
}
#ifdef DUMP_TEST
close(fd);
#endif
Main::stopThread = true;
return NULL;
}
int AudioEncoder::flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index){
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame){
ret=0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);
/* mux encoded frame */
#ifdef DUMP_TEST
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
#endif
}
return ret;
}
void AudioEncoder::start(void)
{
pthread_t encoder;
pthread_create(&encoder, NULL, &AudioEncoder::run, this);
}
AudioEncoder::AudioEncoder() : out_file("test.aac")
{
got_frame = 0;
ret = 0;
size = 0;
av_register_all();
avcodec_register_all();
//Method 1.
pFormatCtx = avformat_alloc_context();
fmt = av_guess_format(NULL, out_file, NULL);
pFormatCtx->oformat = fmt;
#ifdef DUMP_TEST
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){
cerr << "Failed to open output file!\n";
return;
}
#endif
audio_st = avformat_new_stream(pFormatCtx, 0);
if (audio_st==NULL){
return;
}
pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
pCodecCtx->sample_rate= 8000;
pCodecCtx->channel_layout=AV_CH_LAYOUT_MONO;
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
pCodecCtx->bit_rate = 64000;
#ifdef DUMP_TEST
//Show some information
av_dump_format(pFormatCtx, 0, out_file, 1);
#endif
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec){
printf("Can not find encoder!\n");
return;
}
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
printf("Failed to open encoder!\n");
return;
}
pFrame = av_frame_alloc();
pFrame->nb_samples= pCodecCtx->frame_size;
pFrame->format= pCodecCtx->sample_fmt;
size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);
//Write Header
#ifdef DUMP_TEST
avformat_write_header(pFormatCtx,NULL);
#endif
}
AudioEncoder::~AudioEncoder()
{
//Flush Encoder
ret = flush_encoder(pFormatCtx,0);
if (ret < 0) {
cerr << "Flushing encoder failed\n";
return;
}
#ifdef DUMP_TEST
//Write Trailer
av_write_trailer(pFormatCtx);
#endif
//Clean
if (audio_st){
avcodec_close(audio_st->codec);
av_free(pFrame);
av_free(frame_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
}

Multithread decoding of Video PID of Mpeg2Ts using FFMPEG

I'm working on an app in VC++ to display video frames of a video Pid of mpeg2ts stream using FFMPEG and need to do the same, for other mpeg2stream simultaneously by using multi thread process,my source code is:
int main (int argc, char* argv[])
{
av_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();
if(avformat_open_input(&pFormatCtx,filepath,NULL,NULL)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx,NULL)<0){
printf("Couldn't find stream information.\n");
return -1;
}
videoindex=-1;
for(i=0; i<pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO){
videoindex=i;
break;
}
if(videoindex==-1){
printf("Didn't find a video stream.\n");
return -1;
}
pCodecCtx=pFormatCtx->streams[videoindex]->codec;
pCodec=avcodec_find_decoder(pCodecCtx->codec_id);
if(pCodec==NULL){
printf("Codec not found.\n");
return -1;
}
if(avcodec_open2(pCodecCtx, pCodec,NULL)<0){
printf("Could not open codec.\n");
return -1;
}
pFrame=av_frame_alloc();
pFrameYUV=av_frame_alloc();
out_buffer=(uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
packet=(AVPacket *)av_malloc(sizeof(AVPacket));
//Output Info-----------------------------
printf("--------------- File Information ----------------\n");
av_dump_format(pFormatCtx,0,filepath,0);
printf("-------------------------------------------------\n");
img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
#if OUTPUT_YUV420P
fp_yuv=fopen("output.yuv","wb+");
#endif
if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf( "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
screen_w = pCodecCtx->width;
screen_h = pCodecCtx->height;
//SDL 2.0 Support for multiple windows
screen = SDL_CreateWindow("Simplest ffmpeg player's Window", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
screen_w, screen_h, SDL_WINDOW_OPENGL);
if(!screen) {
printf("SDL: could not create window - exiting:%s\n",SDL_GetError());
return -1;
}
sdlRenderer = SDL_CreateRenderer(screen, -1, 0);
//IYUV: Y + U + V (3 planes)
//YV12: Y + V + U (3 planes)
sdlTexture = SDL_CreateTexture(sdlRenderer, SDL_PIXELFORMAT_IYUV, SDL_TEXTUREACCESS_STREAMING,pCodecCtx->width,pCodecCtx->height);
sdlRect.x=0;
sdlRect.y=0;
sdlRect.w=screen_w;
sdlRect.h=screen_h;
//SDL End----------------------
BYTE buffer [4] ;
int nSize = 0 ;
int nByteCnt = 0 ;
int nPreviuosPos = 0 ;
mpgfile = fopen ("D:\\00_Projects\\Farzan II\\SampleData\\Yahsat1996V_N_PID(2101).pes", "rb");
while(av_read_frame(pFormatCtx, packet)>=0 /*&& nSize > 0*/)
{
if(packet->stream_index==videoindex)
{
ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
if(ret < 0)
{
printf("Decode Error.\n");
return -1;
}
if(got_picture)
{
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
#if OUTPUT_YUV420P
y_size=pCodecCtx->width*pCodecCtx->height;
fwrite(pFrameYUV->data[0],1,y_size,fp_yuv); //Y
fwrite(pFrameYUV->data[1],1,y_size/4,fp_yuv); //U
fwrite(pFrameYUV->data[2],1,y_size/4,fp_yuv); //V
#endif
//SDL---------------------------
#if 0
SDL_UpdateTexture( sdlTexture, NULL, pFrameYUV->data[0], pFrameYUV->linesize[0] );
#else
SDL_UpdateYUVTexture(sdlTexture, &sdlRect,
pFrameYUV->data[0], pFrameYUV->linesize[0],
pFrameYUV->data[1], pFrameYUV->linesize[1],
pFrameYUV->data[2], pFrameYUV->linesize[2]);
#endif
SDL_RenderClear( sdlRenderer );
SDL_RenderCopy( sdlRenderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent( sdlRenderer );
//SDL End-----------------------
//Delay 40ms
SDL_Delay(40);
}
}
av_free_packet(packet);
}
//flush decoder
//FIX: Flush Frames remained in Codec
while (1) {
ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
if (ret < 0)
break;
if (!got_picture)
break;
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
#if OUTPUT_YUV420P
int y_size=pCodecCtx->width*pCodecCtx->height;
fwrite(pFrameYUV->data[0],1,y_size,fp_yuv); //Y
fwrite(pFrameYUV->data[1],1,y_size/4,fp_yuv); //U
fwrite(pFrameYUV->data[2],1,y_size/4,fp_yuv); //V
#endif
//SDL---------------------------
SDL_UpdateTexture( sdlTexture, &sdlRect, pFrameYUV->data[0], pFrameYUV->linesize[0] );
SDL_RenderClear( sdlRenderer );
SDL_RenderCopy( sdlRenderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent( sdlRenderer );
//SDL End-----------------------
//Delay 40ms
SDL_Delay(40);
}
sws_freeContext(img_convert_ctx);
#if OUTPUT_YUV420P
fclose(fp_yuv);
#endif
SDL_Quit();
av_frame_free(&pFrameYUV);
av_frame_free(&pFrame);
avcodec_close(pCodecCtx);
avformat_close_input(&pFormatCtx);
return 0;
}
it works well when i call it in One thread but,after calling this function in multi thread ,the error of access violation occurred , is there anyone to guide me to solution?

Output RTSP stream with ffmpeg

I'm attempting to use the ffmpeg libraries to send a video stream from my application to a media server (in this case wowza). I have been able to do the reverse and consume an RTSP stream but I'm having a few issues writing an RTSP stream.
I have found a few examples and attempted to utilise the relevant bits. The code is below. I have simplified it as much as I can. I do only want to send a single H264 bit stream to the wowza server and which it can handle.
I get an "Integer division by zero" exception whenever in the av_interleaved_write_frame function when I try and send a packet. The exception looks like it's related to the packet timestamps not being set correctly. I've tried different values and can get past the exception by setting some contrived values but then the write call fails.
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include "stdafx.h"
#include "windows.h"
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
}
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define VIDEO_CODEC_ID CODEC_ID_H264
static int sws_flags = SWS_BICUBIC;
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
pkt->pts = av_rescale_q_rnd(pkt->pts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->dts = av_rescale_q_rnd(pkt->dts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->duration = av_rescale_q(pkt->duration, *time_base, st->time_base);
pkt->stream_index = st->index;
// Exception occurs here.
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
exit(1);
}
st = avformat_new_stream(oc, *codec);
if (!st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
st->id = oc->nb_streams - 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
return st;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: ");
exit(1);
}
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
fprintf(stderr, "Could not allocate picture: ");
exit(1);
}
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static void write_video_frame(AVFormatContext *oc, AVStream *st, int flush)
{
int ret;
AVCodecContext *c = st->codec;
if (!flush) {
fill_yuv_image(&dst_picture, frame_count, c->width, c->height);
}
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frame_count;
ret = avcodec_encode_video2(c, &pkt, flush ? NULL : frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame:");
exit(1);
}
/* If size is zero, it means the image was buffered. */
if (got_packet) {
ret = write_frame(oc, &c->time_base, st, &pkt);
}
else {
if (flush) {
video_is_eof = 1;
}
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: ");
exit(1);
}
frame_count++;
}
static void close_video(AVFormatContext *oc, AVStream *st)
{
avcodec_close(st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *filename = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *video_st;
AVCodec *video_codec;
double video_time;
int flush, ret;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
AVOutputFormat* oFmt = av_oformat_next(NULL);
while (oFmt) {
if (oFmt->video_codec == VIDEO_CODEC_ID) {
break;
}
oFmt = av_oformat_next(oFmt);
}
if (!oFmt) {
printf("Could not find the required output format.\n");
exit(1);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, oFmt, "rtsp", filename);
if (!oc) {
printf("Could not set the output media context.\n");
exit(1);
}
fmt = oc->oformat;
if (!fmt) {
printf("Could not create the output format.\n");
exit(1);
}
video_st = NULL;
cout << "Codec = " << avcodec_get_name(fmt->video_codec) << endl;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{
video_st = add_stream(oc, &video_codec, fmt->video_codec);
}
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
open_video(oc, video_codec, video_st);
}
av_dump_format(oc, 0, filename, 1);
char errorBuff[80];
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open outfile '%s': %s", filename, av_make_error_string(errorBuff, 80, ret));
return 1;
}
}
flush = 0;
while (video_st && !video_is_eof) {
/* Compute current video time. */
video_time = (video_st && !video_is_eof) ? video_st->pts.val * av_q2d(video_st->time_base) : INFINITY;
if (!flush && (!video_st || video_time >= STREAM_DURATION)) {
flush = 1;
}
if (video_st && !video_is_eof) {
write_video_frame(oc, video_st, flush);
}
}
if (video_st) {
close_video(oc, video_st);
}
if ((fmt->flags & AVFMT_NOFILE)) {
avio_close(oc->pb);
}
avformat_free_context(oc);
printf("finished.\n");
getchar();
return 0;
}
Does anyone have any insights about how the packet timestamps can be successfully set?
I solved the integer division by zero by building ffmpeg on my Windows instance and debugging the av_interleaved_write_frame call. Turns out it was the pts not being set on the video stream object that was causing the exception.
Adding the line below to the while loop in the main function fixed the problem:
video_st->pts.val += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
Here's a sample that works to get a H264 encoded dummy stream to a Wowza server via ffmpeg's RTSP pipeline.
// Roughly based on: https://ffmpeg.org/doxygen/trunk/muxing_8c-source.html
#include <chrono>
#include <thread>
#include <tchar.h>
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
#include <libavutil\time.h>
}
#pragma comment(lib,"libavformat/libavformat.a")
#pragma comment(lib,"libavcodec/libavcodec.a")
#pragma comment(lib,"libavutil/libavutil.a")
#pragma comment(lib,"libswscale/libswscale.a")
#pragma comment(lib,"x264.lib")
#pragma comment(lib,"libswresample/libswresample.a")
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 20
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ //AV_PIX_FMT_NV12;
#define VIDEO_CODEC_ID CODEC_ID_H264
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
av_log(NULL, AV_LOG_ERROR, "Could not find encoder for '%s'.\n", avcodec_get_name(codec_id));
}
else {
st = avformat_new_stream(oc, *codec);
if (!st) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate stream.\n");
}
else {
st->id = oc->nb_streams - 1;
st->time_base.den = st->pts.den = 90000;
st->time_base.num = st->pts.num = 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
}
}
return st;
}
static int open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open video codec.\n", avcodec_get_name(c->codec_id));
}
else {
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate video frame.\n");
ret = -1;
}
else {
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate picture.\n");
}
else {
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
}
}
return ret;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static int write_video_frame(AVFormatContext *oc, AVStream *st, int frameCount)
{
int ret = 0;
AVCodecContext *c = st->codec;
fill_yuv_image(&dst_picture, frameCount, c->width, c->height);
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frameCount;
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error encoding video frame.\n");
}
else {
if (got_packet) {
pkt.stream_index = st->index;
pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
ret = av_write_frame(oc, &pkt);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while writing video frame.\n");
}
}
}
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *url = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
//const char *url = "rtsp://192.168.33.19:1935/ffmpeg/0";
AVFormatContext *outContext;
AVStream *video_st;
AVCodec *video_codec;
int ret = 0, frameCount = 0;
av_log_set_level(AV_LOG_DEBUG);
//av_log_set_level(AV_LOG_TRACE);
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&outContext, NULL, "rtsp", url);
if (!outContext) {
av_log(NULL, AV_LOG_FATAL, "Could not allocate an output context for '%s'.\n", url);
goto end;
}
if (!outContext->oformat) {
av_log(NULL, AV_LOG_FATAL, "Could not create the output format for '%s'.\n", url);
goto end;
}
video_st = add_stream(outContext, &video_codec, VIDEO_CODEC_ID);
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
av_log(NULL, AV_LOG_DEBUG, "Video stream codec %s.\n ", avcodec_get_name(video_st->codec->codec_id));
ret = open_video(outContext, video_codec, video_st);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "Open video stream failed.\n");
goto end;
}
}
else {
av_log(NULL, AV_LOG_FATAL, "Add video stream for the codec '%s' failed.\n", avcodec_get_name(VIDEO_CODEC_ID));
goto end;
}
av_dump_format(outContext, 0, url, 1);
ret = avformat_write_header(outContext, NULL);
if (ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to connect to RTSP server for '%s'.\n", url);
goto end;
}
printf("Press any key to start streaming...\n");
getchar();
auto startSend = std::chrono::system_clock::now();
while (video_st) {
frameCount++;
auto startFrame = std::chrono::system_clock::now();
ret = write_video_frame(outContext, video_st, frameCount);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Write video frame failed.\n", url);
goto end;
}
auto streamDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startSend).count();
printf("Elapsed time %ldms, video stream pts %ld.\n", streamDuration, video_st->pts.val);
if (streamDuration / 1000.0 > STREAM_DURATION) {
break;
}
else {
auto frameDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startFrame).count();
std::this_thread::sleep_for(std::chrono::milliseconds((long)(1000.0 / STREAM_FRAME_RATE - frameDuration)));
}
}
if (video_st) {
avcodec_close(video_st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
avformat_free_context(outContext);
end:
printf("finished.\n");
getchar();
return 0;
}

encoding direcshow frame buffers by using libavcodec

I am trying to encode a stream buffer of frames grabbed by ISampleGrabber(directshow) by using libavcodec. After encoding those frame I am writing it into a file. But after completion file contains only green frames.
hers is code for grabbing frames and encoding it...
void DSGrabberCallback::initFFMpeg(){
const char* filename="G:/test1.mpg";
avcodec_register_all();
printf("Encode video file %s\n", filename);
AVCodecID codec_id=AV_CODEC_ID_MPEG2VIDEO;
codec = avcodec_find_encoder(codec_id);
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
}
c->bit_rate = 4000000;
c->width = 320;
c->height = 240;
AVRational test;
test.den=25;
test.num=1;
c->time_base= test;
c->gop_size = 10;
//c->max_b_frames=1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
}
picture = alloc_picture(c->pix_fmt, c->width, c->height);
/*picture->format = c->pix_fmt;
picture->width = c->width;
picture->height = c->height;*/
av_init_packet(&pkt);
}
void DSGrabberCallback::encodeFrame(unsigned char *frame,ULONG size){
std::cout<<"called.....";
pkt.data = NULL;
pkt.size = 0;
picture->data[0]=frame;
fflush(stdout);
picture->pts=counter;
ret = avcodec_encode_video2(c, &pkt, picture, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", counter, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
STDMETHODIMP DSGrabberCallback::SampleCB(double time, IMediaSample* sample)
{
BYTE* data = NULL;
ULONG length = 0;
m_bytes=NULL;
counter=counter+1;
if(FAILED(sample->GetPointer(&data)))
{
return E_FAIL;
}
length = sample->GetActualDataLength();
if(length == 0)
{
return S_OK;
}
if(!m_bytes || m_bytesLength < length)
{
if(m_bytes)
{
delete[] m_bytes;
}
m_bytes = new unsigned char[length];
m_bytesLength = length;
}
if(true)
{
for(size_t row = 0 ; row < 480 ; row++)
{
memcpy((m_bytes + row * 640 * 2), data + (480 - 1 - row) * 640 * 2,
640 * 2);
}
}
std::cout<<"hiiiiiiiiiiiiiiiiiiiiiiii";
// memcpy(m_bytes, data, length);
// std::cout<<"called............... "<<m_bytes<<"\n";
if(counter<500){
encodeFrame(m_bytes,length);
}else{
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&picture->data[0]);
avcodec_free_frame(&picture);
printf("\n");
exit(1);
}
//rtp.sendRTP(data,length);
//sample->Release();
//printf("Sample received: %p %u\n", data, length);
return S_OK;
}
can anyone tell me where is the problem.
Now working fine. Actually I forgot to convert the image buffer into YUV420P format. I have added some code for scaling the buffer into YUV format and everything is fine now. Thank you Wimmel and Roman R.

Resources