Output RTSP stream with ffmpeg

Output RTSP stream with ffmpeg - visual-c++

I'm attempting to use the ffmpeg libraries to send a video stream from my application to a media server (in this case wowza). I have been able to do the reverse and consume an RTSP stream but I'm having a few issues writing an RTSP stream.
I have found a few examples and attempted to utilise the relevant bits. The code is below. I have simplified it as much as I can. I do only want to send a single H264 bit stream to the wowza server and which it can handle.
I get an "Integer division by zero" exception whenever in the av_interleaved_write_frame function when I try and send a packet. The exception looks like it's related to the packet timestamps not being set correctly. I've tried different values and can get past the exception by setting some contrived values but then the write call fails.
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include "stdafx.h"
#include "windows.h"
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
}
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define VIDEO_CODEC_ID CODEC_ID_H264
static int sws_flags = SWS_BICUBIC;
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
pkt->pts = av_rescale_q_rnd(pkt->pts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->dts = av_rescale_q_rnd(pkt->dts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->duration = av_rescale_q(pkt->duration, *time_base, st->time_base);
pkt->stream_index = st->index;
// Exception occurs here.
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
exit(1);
}
st = avformat_new_stream(oc, *codec);
if (!st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
st->id = oc->nb_streams - 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
return st;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: ");
exit(1);
}
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
fprintf(stderr, "Could not allocate picture: ");
exit(1);
}
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static void write_video_frame(AVFormatContext *oc, AVStream *st, int flush)
{
int ret;
AVCodecContext *c = st->codec;
if (!flush) {
fill_yuv_image(&dst_picture, frame_count, c->width, c->height);
}
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frame_count;
ret = avcodec_encode_video2(c, &pkt, flush ? NULL : frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame:");
exit(1);
}
/* If size is zero, it means the image was buffered. */
if (got_packet) {
ret = write_frame(oc, &c->time_base, st, &pkt);
}
else {
if (flush) {
video_is_eof = 1;
}
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: ");
exit(1);
}
frame_count++;
}
static void close_video(AVFormatContext *oc, AVStream *st)
{
avcodec_close(st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *filename = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *video_st;
AVCodec *video_codec;
double video_time;
int flush, ret;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
AVOutputFormat* oFmt = av_oformat_next(NULL);
while (oFmt) {
if (oFmt->video_codec == VIDEO_CODEC_ID) {
break;
}
oFmt = av_oformat_next(oFmt);
}
if (!oFmt) {
printf("Could not find the required output format.\n");
exit(1);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, oFmt, "rtsp", filename);
if (!oc) {
printf("Could not set the output media context.\n");
exit(1);
}
fmt = oc->oformat;
if (!fmt) {
printf("Could not create the output format.\n");
exit(1);
}
video_st = NULL;
cout << "Codec = " << avcodec_get_name(fmt->video_codec) << endl;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{
video_st = add_stream(oc, &video_codec, fmt->video_codec);
}
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
open_video(oc, video_codec, video_st);
}
av_dump_format(oc, 0, filename, 1);
char errorBuff[80];
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open outfile '%s': %s", filename, av_make_error_string(errorBuff, 80, ret));
return 1;
}
}
flush = 0;
while (video_st && !video_is_eof) {
/* Compute current video time. */
video_time = (video_st && !video_is_eof) ? video_st->pts.val * av_q2d(video_st->time_base) : INFINITY;
if (!flush && (!video_st || video_time >= STREAM_DURATION)) {
flush = 1;
}
if (video_st && !video_is_eof) {
write_video_frame(oc, video_st, flush);
}
}
if (video_st) {
close_video(oc, video_st);
}
if ((fmt->flags & AVFMT_NOFILE)) {
avio_close(oc->pb);
}
avformat_free_context(oc);
printf("finished.\n");
getchar();
return 0;
}
Does anyone have any insights about how the packet timestamps can be successfully set?

I solved the integer division by zero by building ffmpeg on my Windows instance and debugging the av_interleaved_write_frame call. Turns out it was the pts not being set on the video stream object that was causing the exception.
Adding the line below to the while loop in the main function fixed the problem:
video_st->pts.val += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
Here's a sample that works to get a H264 encoded dummy stream to a Wowza server via ffmpeg's RTSP pipeline.
// Roughly based on: https://ffmpeg.org/doxygen/trunk/muxing_8c-source.html
#include <chrono>
#include <thread>
#include <tchar.h>
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
#include <libavutil\time.h>
}
#pragma comment(lib,"libavformat/libavformat.a")
#pragma comment(lib,"libavcodec/libavcodec.a")
#pragma comment(lib,"libavutil/libavutil.a")
#pragma comment(lib,"libswscale/libswscale.a")
#pragma comment(lib,"x264.lib")
#pragma comment(lib,"libswresample/libswresample.a")
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 20
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ //AV_PIX_FMT_NV12;
#define VIDEO_CODEC_ID CODEC_ID_H264
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
av_log(NULL, AV_LOG_ERROR, "Could not find encoder for '%s'.\n", avcodec_get_name(codec_id));
}
else {
st = avformat_new_stream(oc, *codec);
if (!st) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate stream.\n");
}
else {
st->id = oc->nb_streams - 1;
st->time_base.den = st->pts.den = 90000;
st->time_base.num = st->pts.num = 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
}
}
return st;
}
static int open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open video codec.\n", avcodec_get_name(c->codec_id));
}
else {
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate video frame.\n");
ret = -1;
}
else {
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate picture.\n");
}
else {
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
}
}
return ret;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static int write_video_frame(AVFormatContext *oc, AVStream *st, int frameCount)
{
int ret = 0;
AVCodecContext *c = st->codec;
fill_yuv_image(&dst_picture, frameCount, c->width, c->height);
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frameCount;
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error encoding video frame.\n");
}
else {
if (got_packet) {
pkt.stream_index = st->index;
pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
ret = av_write_frame(oc, &pkt);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while writing video frame.\n");
}
}
}
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *url = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
//const char *url = "rtsp://192.168.33.19:1935/ffmpeg/0";
AVFormatContext *outContext;
AVStream *video_st;
AVCodec *video_codec;
int ret = 0, frameCount = 0;
av_log_set_level(AV_LOG_DEBUG);
//av_log_set_level(AV_LOG_TRACE);
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&outContext, NULL, "rtsp", url);
if (!outContext) {
av_log(NULL, AV_LOG_FATAL, "Could not allocate an output context for '%s'.\n", url);
goto end;
}
if (!outContext->oformat) {
av_log(NULL, AV_LOG_FATAL, "Could not create the output format for '%s'.\n", url);
goto end;
}
video_st = add_stream(outContext, &video_codec, VIDEO_CODEC_ID);
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
av_log(NULL, AV_LOG_DEBUG, "Video stream codec %s.\n ", avcodec_get_name(video_st->codec->codec_id));
ret = open_video(outContext, video_codec, video_st);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "Open video stream failed.\n");
goto end;
}
}
else {
av_log(NULL, AV_LOG_FATAL, "Add video stream for the codec '%s' failed.\n", avcodec_get_name(VIDEO_CODEC_ID));
goto end;
}
av_dump_format(outContext, 0, url, 1);
ret = avformat_write_header(outContext, NULL);
if (ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to connect to RTSP server for '%s'.\n", url);
goto end;
}
printf("Press any key to start streaming...\n");
getchar();
auto startSend = std::chrono::system_clock::now();
while (video_st) {
frameCount++;
auto startFrame = std::chrono::system_clock::now();
ret = write_video_frame(outContext, video_st, frameCount);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Write video frame failed.\n", url);
goto end;
}
auto streamDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startSend).count();
printf("Elapsed time %ldms, video stream pts %ld.\n", streamDuration, video_st->pts.val);
if (streamDuration / 1000.0 > STREAM_DURATION) {
break;
}
else {
auto frameDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startFrame).count();
std::this_thread::sleep_for(std::chrono::milliseconds((long)(1000.0 / STREAM_FRAME_RATE - frameDuration)));
}
}
if (video_st) {
avcodec_close(video_st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
avformat_free_context(outContext);
end:
printf("finished.\n");
getchar();
return 0;
}

Related

Audio Recording and Playback in C : problem with audio gain

The question essentially is how to correctly apply gain to an audio sample?
I'm programming on FreeBSD and OSS, but manipulate volume in audio sample is probably the same for other OS and applications.
I'm studying others' applications internals like ecasound (in C++) and SoX (in C) but I don't know whats wrong when I read a sample and apply gain to it : it becomes distorted and noisy. My point is to understand why it is not working to turn the volume down (gain lesser than 1).
I'm working with stereo 16 bit LE samples. Without applying gain, it works perfectly (recording and playback).
I thought that I should convert an integer sample to float; multiply by a gain factor and restore it to integer. But it is not working. And it seems to be the exact same approach for SoX in src/vol.c in function static int flow.
Below is my code (no additional libs used). The function playback is where I'm applying gain.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/raw-file.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
int *buf[fragmentSize];
read(fdDsp, buf, fragmentSize);
write(fdOutput, buf, fragmentSize);
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
int buf[fragmentSize];
eof = read(fdInput, buf, fragmentSize); //bytes read or -1 if EOF
// audio processing:
for(int i=0;i<fragmentSize;i++){
// learning how to get left and right channels from buffer
int l = (buf)[i] & 0xffff;
int r = ((buf)[i] >> 16) & 0xffff ;
// FIXME: it is causing distortion:
float fl = l;
float fr = r;
fl *= 1.0;
fr *= 0.3; //if different than 1, sounds distorted and noisy
l = fl;
r = fr;
// OK: unite Left and Right channels again
int lr = (l ) | (r << 16);
// OK: other options to mix these two channels:
int lleft = l; //Just the left channel
int rright = (r << 16); //Just the right channel
int lmono = (l << 16) | l; //Left ch. on both channels
int rmono = (r << 16) | r; //Right ch. on both channels
// the output:
(buf)[i] = lr;
}
write(fdDsp, buf, fragmentSize);
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
UPDATE:
As pointed out by CL, I was using the wrong type and the last parameter of read()/write() is greater than the size of the buffer.
So, in FreeBSD I changed the buffer type to int16_t (short) defined in #include <stdint.h> .
Now I can correctly apply a gain as desired:
float fl = l;
float fr = r;
fl *= 1.0f;
fr *= 1.5f;
l = fl;
r = fr;
I'll accept CL's answer.
Now the audio processing loop is working with one sample per time (left and right interleaved).
Updated code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#include <stdint.h> //has type int16_t (short)
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/stereo.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
// Wrong:
// int *buf[fragmentSize];
// read(fdDsp, buf, fragmentSize);
// write(fdOutput, buf, fragmentSize);
int16_t *buf[fragmentSize/sizeof (int16_t)];
read(fdDsp, buf, fragmentSize/sizeof (int16_t));
write(fdOutput, buf, fragmentSize/sizeof (int16_t));
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
//Wrong buffer type (too large) and wrong last parameter for read():
// int buf[fragmentSize];
// eof = read(fdInput, buf, fragmentSize);
int16_t buf[fragmentSize/sizeof (int16_t)];
eof = read(fdInput, buf, fragmentSize/sizeof (int16_t));
// audio processing:
for(int i=0;i<fragmentSize/sizeof (int16_t);i++){
int16_t l = buf[i];
int16_t r = buf[i+1];
// Using int16_t (short) buffer, gain works but stereo is inverted with factor >= 1.4f
float fl = l;
float fr = r;
fl *= 2.0f;
fr *= 3.0f;
l = fl;
r = fr;
// the output:
(buf)[i] = l;
i++;
(buf)[i] = r;
}
// write(fdDsp, buf, fragmentSize); //wrong
write(fdDsp, buf, fragmentSize/sizeof (int16_t));
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
Thanks,

The last parameter of read()/write() is the number of bytes, but an entry in buf[] has more than one byte.
In the two's complement representation of binary numbers, negative values are (or must be) sign extended, i.e., the most significant bits are ones. In this code, neither extracting L/R channels nor combining them works correctly for negative samples.
The easiest way of handling negative samples would be to use one array entry per sample, i.e., short int.

ffmpeg record video plays too fast

I'm a college student and I am studying FFmpeg now.
I have wrote a software that can record desktops and audio('virtual-audio-capturer') with FFmpeg.And I am now writing Audio and Video Synchronization.
I met some problems that video recording plays too fast.
When I look for audio and video synchronization help on the Internet，I find a formula for calculating PTS ：
pts = n * (（1 / timbase）/ fps)
When I use this formula,I find a phenomenon.
1.The higher frame rate is,the faster the video playback speed.
2.The slower the frame rate, the faster the video playback.
Also I find while the framerate is 10，the video playback speed will be right.
Why has this situation happened?
I have thought this question for three days. I really hope someone can help me solve this problem.
I really appreciate the help.
#include "stdafx.h"
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;
AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;
AVStream *pVideoStream = NULL, *pAudioStream = NULL;
AVCodec *outAVCodec;
AVCodec *outAudioCodec;
AVCodecContext *pCodecCtx_Video;
AVCodec *pCodec_Video;
AVFifoBuffer *fifo_video = NULL;
AVAudioFifo *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;
CRITICAL_SECTION AudioSection, VideoSection;
SwsContext *img_convert_ctx;
int frame_size = 0;
uint8_t *picture_buf = NULL, *frame_buf = NULL;
bool bCap = true;
DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );
int OpenVideoCapture()
{
AVInputFormat *ifmt=av_find_input_format("gdigrab");
AVDictionary *options = NULL;
av_dict_set(&options, "framerate", "60", NULL);
if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options)!=0)
{
printf("Couldn't open input stream.（无法打开视频输入流）\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
{
printf("Couldn't find stream information.（无法获取视频流信息）\n");
return -1;
}
if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
{
printf("Couldn't find video stream information.（无法获取视频流信息）\n");
return -1;
}
pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
if(pCodec_Video == NULL)
{
printf("Codec not found.（没有找到解码器）\n");
return -1;
}
if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
{
printf("Could not open codec.（无法打开解码器）\n");
return -1;
}
av_dump_format(pFormatCtx_Video, 0, NULL, 0);
img_convert_ctx = sws_getContext(pCodecCtx_Video->width, pCodecCtx_Video->height, pCodecCtx_Video->pix_fmt,
pCodecCtx_Video->width, pCodecCtx_Video->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
fifo_video = av_fifo_alloc(30 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));
return 0;
}
static char *dup_wchar_to_utf8(wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *) av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int OpenAudioCapture()
{
AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");
if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt,NULL) < 0)
{
printf("Couldn't open input stream.（无法打开音频输入流）\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)
return -1;
if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
{
printf("Couldn't find video stream information.（无法获取音频流信息）\n");
return -1;
}
AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
{
printf("can not find or open audio decoder!\n");
}
av_dump_format(pFormatCtx_Audio, 0, NULL, 0);
return 0;
}
int OpenOutPut()
{
AVStream *pVideoStream = NULL, *pAudioStream = NULL;
const char *outFileName = "test.mp4";
avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);
if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
VideoIndex = 0;
pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
if (!pVideoStream)
{
printf("can not new stream for output!\n");
return -1;
}
outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
if ( !outVideoCodecCtx )
{
printf("Error : avcodec_alloc_context3()\n");
return -1;
}
//set codec context param
outVideoCodecCtx = pVideoStream->codec;
outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;
outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
if (codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
}
outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
if( !outAVCodec )
{
printf("\n\nError : avcodec_find_encoder()");
return -1;
}
if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;
if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
{
printf("can not open the encoder\n");
return -1;
}
}
if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
AVCodecContext *pOutputCodecCtx;
AudioIndex = 1;
pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);
pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);
pOutputCodecCtx = pAudioStream->codec;
pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
if(pOutputCodecCtx->channel_layout == 0)
{
pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);
}
pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
AVRational time_base={1, pAudioStream->codec->sample_rate};
pAudioStream->time_base = time_base;
//audioCodecCtx->time_base = time_base;
pOutputCodecCtx->codec_tag = 0;
if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
{
printf("编码器打开失败，退出程序\n");
return -1;
}
}
if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
{
if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
{
printf("can not open output file handle!\n");
return -1;
}
}
if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
{
printf("can not write the header of the output file!\n");
return -1;
}
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
av_register_all();
avdevice_register_all();
if (OpenVideoCapture() < 0)
{
return -1;
}
if (OpenAudioCapture() < 0)
{
return -1;
}
if (OpenOutPut() < 0)
{
return -1;
}
// int fps;
/*printf("输入帧率：");
scanf_s("%d",&fps);
if ( NULL == fps)
{
fps = 10;
}*/
InitializeCriticalSection(&VideoSection);
InitializeCriticalSection(&AudioSection);
AVFrame *picture = av_frame_alloc();
int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
picture_buf = new uint8_t[size];
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
//star cap screen thread
CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
//star cap audio thread
CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
int64_t cur_pts_v=0,cur_pts_a=0;
int VideoFrameIndex = 0, AudioFrameIndex = 0;
while(1)
{
if (_kbhit() != 0 && bCap)
{
bCap = false;
Sleep(2000);
}
if (fifo_audio && fifo_video)
{
int sizeAudio = av_audio_fifo_size(fifo_audio);
int sizeVideo = av_fifo_size(fifo_video);
//缓存数据写完就结束循环
if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size &&
av_fifo_size(fifo_video) <= frame_size && !bCap)
{
break;
}
}
if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base,
cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
{
if (av_fifo_size(fifo_video) < frame_size && !bCap)
{
cur_pts_v = 0x7fffffffffffffff;
}
if(av_fifo_size(fifo_video) >= size)
{
EnterCriticalSection(&VideoSection);
av_fifo_generic_read(fifo_video, picture_buf, size, NULL); //将数据从avfifobuffer馈送到用户提供的回调。
LeaveCriticalSection(&VideoSection);
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height); //根据指定的图像参数和提供的图像数据缓冲区设置图片字段。
//pts = n * (（1 / timbase）/ fps);
//picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 24);
picture->pts = VideoFrameIndex * ((outVideoCodecCtx->time_base.den * 100000 / outVideoCodecCtx->time_base.num) / 180);
int got_picture = 0;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
//从帧中获取输入的原始视频数据
int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
if(ret < 0)
{
continue;
}
if (got_picture==1)
{
pkt.stream_index = VideoIndex;
/*int count = 1;
pkt.pts = pkt.dts = count * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
count++;*/
//x = pts * (timebase1.num / timebase1.den )* (timebase2.den / timebase2.num);
pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base,
pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, pFormatCtx_Video->streams[0]->time_base,
pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / 60);
//pkt.duration = 1000/60;
//pkt.pts = pkt.dts = Count * (ofmt_ctx->streams[stream_index]->time_base.den) /ofmt_ctx->streams[stream_index]->time_base.num / 10;
//Count++;
cur_pts_v = pkt.pts;
ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
//delete[] pkt.data;
av_free_packet(&pkt);
}
VideoFrameIndex++;
}
}
else
{
if (NULL == fifo_audio)
{
continue;//还未初始化fifo
}
if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
{
cur_pts_a = 0x7fffffffffffffff;
}
if(av_audio_fifo_size(fifo_audio) >=
(pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
{
AVFrame *frame;
frame = av_frame_alloc();
frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
av_frame_get_buffer(frame, 0);
EnterCriticalSection(&AudioSection);
av_audio_fifo_read(fifo_audio, (void **)frame->data,
(pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
LeaveCriticalSection(&AudioSection);
AVPacket pkt_out;
av_init_packet(&pkt_out);
int got_picture = -1;
pkt_out.data = NULL;
pkt_out.size = 0;
frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
{
printf("can not decoder a frame");
}
av_frame_free(&frame);
if (got_picture)
{
pkt_out.stream_index = AudioIndex;
pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
cur_pts_a = pkt_out.pts;
int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
av_free_packet(&pkt_out);
}
AudioFrameIndex++;
}
}
}
delete[] picture_buf;
av_fifo_free(fifo_video);
av_audio_fifo_free(fifo_audio);
av_write_trailer(pFormatCtx_Out);
avio_close(pFormatCtx_Out->pb);
avformat_free_context(pFormatCtx_Out);
if (pFormatCtx_Video != NULL)
{
avformat_close_input(&pFormatCtx_Video);
pFormatCtx_Video = NULL;
}
if (pFormatCtx_Audio != NULL)
{
avformat_close_input(&pFormatCtx_Audio);
pFormatCtx_Audio = NULL;
}
return 0;
}
DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
AVPacket packet;
int got_picture;
AVFrame *pFrame;
pFrame=av_frame_alloc();
AVFrame *picture = av_frame_alloc();
int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
FILE *p = NULL;
p = fopen("proc_test.yuv", "wb+");
av_init_packet(&packet);
int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
int y_size=height*width;
while(bCap)
{
packet.data = NULL;
packet.size = 0;
if (av_read_frame(pFormatCtx_Video, &packet) < 0)
{
continue;
}
if(packet.stream_index == 0)
{
if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
{
printf("Decode Error.（解码错误）\n");
continue;
}
if (got_picture)
{
sws_scale(img_convert_ctx,
(const uint8_t* const*)pFrame->data,
pFrame->linesize,
0,
pFormatCtx_Out->streams[VideoIndex]->codec->height,
picture->data,
picture->linesize);
if (av_fifo_space(fifo_video) >= size)
{
EnterCriticalSection(&VideoSection);
av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
LeaveCriticalSection(&VideoSection);
}
}
}
av_free_packet(&packet);
}
av_frame_free(&pFrame);
av_frame_free(&picture);
return 0;
}
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
AVPacket pkt;
AVFrame *frame;
frame = av_frame_alloc();
int gotframe;
while(bCap)
{
pkt.data = NULL;
pkt.size = 0;
if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
{
continue;
}
if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
{
av_frame_free(&frame);
printf("can not decoder a frame");
break;
}
av_free_packet(&pkt);
if (!gotframe)
{
printf("没有获取到数据，继续下一次");
continue;
}
if (NULL == fifo_audio)
{
fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt,
pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
}
int buf_space = av_audio_fifo_space(fifo_audio);
if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
{
EnterCriticalSection(&AudioSection);
av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
LeaveCriticalSection(&AudioSection);
}
}
av_frame_free(&frame);
return 0;
}
Maybe there is another way to calculate PTS and DTS
I hope whatever the frame rate is,video playback speed is right.Not too fast or too slow.

Finally, I found out the reason for the problem.
The frame rate of video generated by video recording is determined by the recorded video stream.My computer's maximum frame rate for gdigrab is 10 frames,so if I set it more than 10 frames, the playback speed will be fast.And if i set it less than 10 frames, the playback speed will be slow.But i run code on my computer where I play games.Whatever i choose 10 frames or 15 frames,the palyback speed will be correct.
Still,i don't know the reason why my gdigrab's framerate is only 0-10 frames.There are a number of factors that affect the frame rate of video recording，such as CPU Graphics， card，display and Memory.
Here is the final code
capture screen and audio with FFMPEG
#include "stdafx.h"
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;
AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;
AVStream *pVideoStream = NULL, *pAudioStream = NULL;
AVCodec *outAVCodec;
AVCodec *outAudioCodec;
AVCodecContext *pCodecCtx_Video;
AVCodec *pCodec_Video;
AVFifoBuffer *fifo_video = NULL;
AVAudioFifo *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;
CRITICAL_SECTION AudioSection, VideoSection;
SwsContext *img_convert_ctx;
int frame_size = 0;
uint8_t *picture_buf = NULL;
bool bCap = true;
DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );
static char *dup_wchar_to_utf8(wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *) av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int OpenVideoCapture()
{
int fps = 10;
char opt;
printf("选择视频播放帧率：\n");
printf("A 5帧\n");
printf("B 10帧\n");
printf("C 15帧\n");
printf("D 20帧\n");
printf("E 25帧\n");
printf("F 30帧\n");
opt = getchar();
AVDictionary *options = NULL;
switch (opt)
{
case 'A':
fps = 5;
av_dict_set(&options, "framerate", "5", 0);
break;
case 'a':
fps = 5;
av_dict_set(&options, "framerate", "5", 0);
break;
case 'B':
fps = 10;
av_dict_set(&options, "framerate", "10", 0);
break;
case 'b':
fps = 10;
av_dict_set(&options, "framerate", "10", 0);
break;
case 'C':
fps = 15;
av_dict_set(&options, "framerate", "15", 0);
break;
case 'c':
fps = 15;
av_dict_set(&options, "framerate", "15", 0);
break;
case 'D':
fps = 20;
av_dict_set(&options, "framerate", "20", 0);
break;
case 'd':
fps = 20;
av_dict_set(&options, "framerate", "20", 0);
break;
case 'E':
fps = 25;
av_dict_set(&options, "framerate", "25", 0);
break;
case 'e':
fps = 25;
av_dict_set(&options, "framerate", "25", 0);
break;
case 'F':
fps = 30;
av_dict_set(&options, "framerate", "30", 0);
break;
case 'f':
fps = 30;
av_dict_set(&options, "framerate", "30", 0);
break;
default:
printf("选项输入错误\n");
return -1;
}
//AVDictionary *options = NULL;
//av_dict_set(&options, "framerate", "15", 0);
AVInputFormat *ifmt=av_find_input_format("gdigrab");
if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options) != 0)
{
printf("Couldn't open input stream.（无法打开视频输入流）\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
{
printf("Couldn't find stream information.（无法获取视频流信息）\n");
return -1;
}
if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
{
printf("Couldn't find video stream information.（无法获取视频流信息）\n");
return -1;
}
pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
if(pCodec_Video == NULL)
{
printf("Codec not found.（没有找到解码器）\n");
return -1;
}
if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
{
printf("Could not open codec.（无法打开解码器）\n");
return -1;
}
av_dump_format(pFormatCtx_Video, 0, NULL, 0);
img_convert_ctx = sws_getContext(pCodecCtx_Video->width,
pCodecCtx_Video->height,
pCodecCtx_Video->pix_fmt,
pCodecCtx_Video->width,
pCodecCtx_Video->height,
PIX_FMT_YUV420P,
SWS_BICUBIC, NULL, NULL, NULL);
frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
fifo_video = av_fifo_alloc(60 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));
av_dict_free(&options);
return 0;
}
int OpenAudioCapture()
{
AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
AVDictionary *opt = NULL;
char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");
if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt, &opt) < 0)
{
printf("Couldn't open input stream.（无法打开音频输入流）\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)
return -1;
if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
{
printf("Couldn't find video stream information.（无法获取音频流信息）\n");
return -1;
}
AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
{
printf("can not find or open audio decoder!\n");
}
av_dump_format(pFormatCtx_Audio, 0, NULL, 0);
return 0;
}
int OpenOutPut()
{
AVStream *pVideoStream = NULL, *pAudioStream = NULL;
const char *outFileName = "test.mp4";
avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);
if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
VideoIndex = 0;
pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
if (!pVideoStream)
{
printf("can not new stream for output!\n");
return -1;
}
outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
if ( !outVideoCodecCtx )
{
printf("Error : avcodec_alloc_context3()\n");
return -1;
}
outVideoCodecCtx = pVideoStream->codec;
outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;;
outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
if (codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
}
outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
if( !outAVCodec )
{
printf("\n\nError : avcodec_find_encoder()");
return -1;
}
if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;
if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
{
printf("can not open the encoder\n");
return -1;
}
}
if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
AVCodecContext *pOutputCodecCtx;
AudioIndex = 1;
pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);
pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);
pOutputCodecCtx = pAudioStream->codec;
pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
if(pOutputCodecCtx->channel_layout == 0)
{
pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);
}
pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
AVRational time_base={1, pAudioStream->codec->sample_rate};
pAudioStream->time_base = time_base;
pOutputCodecCtx->codec_tag = 0;
if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
{
printf("编码器打开失败，退出程序\n");
return -1;
}
}
if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
{
if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
{
printf("can not open output file handle!\n");
return -1;
}
}
if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
{
printf("can not write the header of the output file!\n");
return -1;
}
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
av_register_all();
avdevice_register_all();
if (OpenVideoCapture() < 0)
{
return -1;
}
if (OpenAudioCapture() < 0)
{
return -1;
}
if (OpenOutPut() < 0)
{
return -1;
}
InitializeCriticalSection(&VideoSection);
InitializeCriticalSection(&AudioSection);
AVFrame *picture = av_frame_alloc();
int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
picture_buf = new uint8_t[size];
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
//star cap screen thread
CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
//star cap audio thread
CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
int64_t cur_pts_v=0,cur_pts_a=0;
int64_t VideoFrameIndex = 0, AudioFrameIndex = 0;
int64_t count = 1;
int64_t video_pre_pts = 0;
while(1)
{
if (_kbhit() != 0 && bCap)
{
bCap = false;
Sleep(2000);
}
if (fifo_audio && fifo_video)
{
int sizeAudio = av_audio_fifo_size(fifo_audio);
int sizeVideo = av_fifo_size(fifo_video);
//缓存数据写完就结束循环
if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size &&
av_fifo_size(fifo_video) <= frame_size && !bCap)
{
break;
}
}
if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base, cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
{
if (av_fifo_size(fifo_video) < frame_size && !bCap)
{
cur_pts_v = 0x7fffffffffffffff;
}
if(av_fifo_size(fifo_video) >= size)
{
//将数据从avfifobuffer馈送到用户提供的回调。
EnterCriticalSection(&VideoSection);
av_fifo_generic_read(fifo_video, picture_buf, size, NULL);
LeaveCriticalSection(&VideoSection);
//根据指定的图像参数和提供的图像数据缓冲区设置图片字段。
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
//pts = n * (（1 / timbase）/ fps);
//picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
picture->pts = av_rescale_q(VideoFrameIndex,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);
printf("picture->pts: %d\n",picture->pts);
int got_picture = 0;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
//从帧中获取输入的原始视频数据
int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
if(ret < 0)
{
continue;
}
if (got_picture==1)
{
pkt.stream_index = VideoIndex;
//pFormatCtx_Video //pFormatCtx_Out
pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
printf("pkt.pts = %d\n",pkt.pts);
pkt.dts = av_rescale_q_rnd(pkt.dts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
printf("pkt.dts = %d\n",pkt.dts);
pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / outVideoCodecCtx->time_base.den);
//pkt.duration = 1;
//pkt.duration = av_rescale_q(pkt.duration,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);
printf("pkt.duration = %d\n",pkt.duration);
pkt.pos = -1;
cur_pts_v = pkt.pts;
ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
if (ret<0)
{
printf("Error muxing packet\n");
break;
}
av_free_packet(&pkt);
}
VideoFrameIndex++;
}
}
else
{
if (NULL == fifo_audio)
{
continue;//还未初始化fifo
}
if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
{
cur_pts_a = 0x7fffffffffffffff;
}
if(av_audio_fifo_size(fifo_audio) >=
(pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
{
AVFrame *frame;
frame = av_frame_alloc();
frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
av_frame_get_buffer(frame, 0);
EnterCriticalSection(&AudioSection);
av_audio_fifo_read(fifo_audio, (void **)frame->data,
(pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
LeaveCriticalSection(&AudioSection);
AVPacket pkt_out;
av_init_packet(&pkt_out);
int got_picture = -1;
pkt_out.data = NULL;
pkt_out.size = 0;
frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
{
printf("can not decoder a frame");
}
av_frame_free(&frame);
if (got_picture)
{
pkt_out.stream_index = AudioIndex;
pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
cur_pts_a = pkt_out.pts;
int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
if (ret<0)
{
printf("Error muxing packet\n");
break;
}
av_free_packet(&pkt_out);
}
AudioFrameIndex++;
}
}
}
delete[] picture_buf;
av_fifo_free(fifo_video);
av_audio_fifo_free(fifo_audio);
av_write_trailer(pFormatCtx_Out);
avio_close(pFormatCtx_Out->pb);
avformat_free_context(pFormatCtx_Out);
if (pFormatCtx_Video != NULL)
{
avformat_close_input(&pFormatCtx_Video);
pFormatCtx_Video = NULL;
}
if (pFormatCtx_Audio != NULL)
{
avformat_close_input(&pFormatCtx_Audio);
pFormatCtx_Audio = NULL;
}
return 0;
}
DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
AVPacket packet;
int got_picture;
AVFrame *pFrame;
pFrame=av_frame_alloc();
AVFrame *picture = av_frame_alloc();
int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
avpicture_fill((AVPicture *)picture, picture_buf,
pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
pFormatCtx_Out->streams[VideoIndex]->codec->width,
pFormatCtx_Out->streams[VideoIndex]->codec->height);
av_init_packet(&packet);
int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
int y_size=height*width;
while(bCap)
{
packet.data = NULL;
packet.size = 0;
if (av_read_frame(pFormatCtx_Video, &packet) < 0)
{
continue;
}
if(packet.stream_index == 0)
{
if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
{
printf("Decode Error.（解码错误）\n");
continue;
}
if (got_picture)
{
sws_scale(img_convert_ctx,
(const uint8_t* const*)pFrame->data,
pFrame->linesize,
0,
pFormatCtx_Out->streams[VideoIndex]->codec->height,
picture->data,
picture->linesize);
if (av_fifo_space(fifo_video) >= size)
{
EnterCriticalSection(&VideoSection);
av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
LeaveCriticalSection(&VideoSection);
}
}
}
av_free_packet(&packet);
}
av_frame_free(&pFrame);
av_frame_free(&picture);
return 0;
}
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
AVPacket pkt;
AVFrame *frame;
frame = av_frame_alloc();
int gotframe;
while(bCap)
{
pkt.data = NULL;
pkt.size = 0;
if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
{
continue;
}
if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
{
av_frame_free(&frame);
printf("can not decoder a frame");
break;
}
av_free_packet(&pkt);
if (!gotframe)
{
printf("没有获取到数据，继续下一次");
continue;
}
if (NULL == fifo_audio)
{
fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt,
pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
}
int buf_space = av_audio_fifo_space(fifo_audio);
if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
{
EnterCriticalSection(&AudioSection);
av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
LeaveCriticalSection(&AudioSection);
}
}
av_frame_free(&frame);
return 0;
}

FFMPEG AAC encoder issue

I am trying to capture and encode audio data, I am encoding audio using
FFMPEG AAC and to capture PCM data I used ALSA, Capturing part is working in my case, However, AAC encoder is not working.
I am trying to play test.aac file using
ffplay test.aac
but it contains lots of noise.
Attaching code for aac encoder :
#include "AudioEncoder.h"
void* AudioEncoder::run(void *ctx)
{
return ((AudioEncoder *)ctx)->execute();
}
static int frameCount = 0;
void* AudioEncoder::execute(void)
{
float buf[size], *temp;
int totalSize = 0;
int fd = open("in.pcm", O_CREAT| O_RDWR, 0666);
int frameSize = 128 * snd_pcm_format_width(SND_PCM_FORMAT_FLOAT) / 8 * 2;
av_new_packet(&pkt,size);
cout << size << endl;
while (!Main::stopThread)
{
temp = (Main::fbAudio)->dequeue();
memcpy(buf + totalSize, temp, frameSize);
write(fd, temp, frameSize); // Can play in.pcm with no noise in it.
totalSize += frameSize;
delete temp;
if (totalSize >= size)
{
totalSize = 0;
//frame_buf = (uint8_t *) buf;
pFrame->data[0] = (uint8_t *)buf; //PCM Data
pFrame->pts=frameCount;
frameCount++;
got_frame=0;
//Encode
ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);
if(ret < 0){
cerr << "Failed to encode!\n";
return NULL;
}
if (got_frame==1){
printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);
pkt.stream_index = audio_st->index;
#ifdef DUMP_TEST
ret = av_write_frame(pFormatCtx, &pkt);
#endif
av_free_packet(&pkt);
}
//memset(buf, 0, sizeof(float)*size);
}
//delete temp;
//if (buf.size() >= m_audio_output_decoder_ctx->frame_size)
/* encode the audio*/
}
close(fd);
Main::stopThread = true;
return NULL;
}
int AudioEncoder::flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index){
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame){
ret=0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);
/* mux encoded frame */
#ifdef DUMP_TEST
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
#endif
}
return ret;
}
void AudioEncoder::start(void)
{
pthread_t encoder;
pthread_create(&encoder, NULL, &AudioEncoder::run, this);
}
AudioEncoder::AudioEncoder() : out_file("test.aac")
{
got_frame = 0;
ret = 0;
size = 0;
av_register_all();
avcodec_register_all();
//Method 1.
pFormatCtx = avformat_alloc_context();
fmt = av_guess_format(NULL, out_file, NULL);
pFormatCtx->oformat = fmt;
#ifdef DUMP_TEST
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){
cerr << "Failed to open output file!\n";
return;
}
#endif
audio_st = avformat_new_stream(pFormatCtx, 0);
if (audio_st==NULL){
return;
}
pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
pCodecCtx->sample_rate= 8000;
pCodecCtx->channel_layout=AV_CH_LAYOUT_STEREO;
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
// pCodecCtx->bit_rate = 64000;
#ifdef DUMP_TEST
//Show some information
av_dump_format(pFormatCtx, 0, out_file, 1);
#endif
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec){
printf("Can not find encoder!\n");
return;
}
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
printf("Failed to open encoder!\n");
return;
}
pFrame = av_frame_alloc();
pFrame->nb_samples= pCodecCtx->frame_size;
pFrame->format= pCodecCtx->sample_fmt;
size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);
//Write Header
#ifdef DUMP_TEST
avformat_write_header(pFormatCtx,NULL);
#endif
}
AudioEncoder::~AudioEncoder()
{
//Flush Encoder
ret = flush_encoder(pFormatCtx,0);
if (ret < 0) {
cerr << "Flushing encoder failed\n";
return;
}
#ifdef DUMP_TEST
//Write Trailer
av_write_trailer(pFormatCtx);
#endif
//Clean
if (audio_st){
avcodec_close(audio_st->codec);
av_free(pFrame);
av_free(frame_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
}
Here, please ignore DUMP_TEST flag, I already enabled it.
Can some one tell me what is issue ?
Thanks,
Harshil

I am able to resolve this issue, by correctly passing buffer from ALSA to AAC encoder.
Here AAC expects buffer size of 4096 bytes, but from deque I am passing 1024 bytes which causes issue, also I updated audio channels to MONO, in place of STEREO. Attaching my working code snippet for more information:
#include "AudioEncoder.h"
void* AudioEncoder::run(void *ctx)
{
return ((AudioEncoder *)ctx)->execute();
}
static int frameCount = 0;
void* AudioEncoder::execute(void)
{
float *temp;
#ifdef DUMP_TEST
int fd = open("in.pcm", O_CREAT| O_RDWR, 0666);
#endif
int frameSize = 1024 * snd_pcm_format_width(SND_PCM_FORMAT_FLOAT) / 8 * 1;
av_new_packet(&pkt,size);
while (!Main::stopThread)
{
temp = (Main::fbAudio)->dequeue();
frame_buf = (uint8_t *) temp;
pFrame->data[0] = frame_buf;
pFrame->pts=frameCount*100;
frameCount++;
got_frame=0;
//Encode
ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);
if(ret < 0){
cerr << "Failed to encode!\n";
return NULL;
}
if (got_frame==1){
cout << "Encoded frame\n";
pkt.stream_index = audio_st->index;
#ifdef DUMP_TEST
write(fd, temp, frameSize);
ret = av_interleaved_write_frame(pFormatCtx, &pkt);
#endif
av_free_packet(&pkt);
}
delete temp;
}
#ifdef DUMP_TEST
close(fd);
#endif
Main::stopThread = true;
return NULL;
}
int AudioEncoder::flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index){
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame){
ret=0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);
/* mux encoded frame */
#ifdef DUMP_TEST
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
#endif
}
return ret;
}
void AudioEncoder::start(void)
{
pthread_t encoder;
pthread_create(&encoder, NULL, &AudioEncoder::run, this);
}
AudioEncoder::AudioEncoder() : out_file("test.aac")
{
got_frame = 0;
ret = 0;
size = 0;
av_register_all();
avcodec_register_all();
//Method 1.
pFormatCtx = avformat_alloc_context();
fmt = av_guess_format(NULL, out_file, NULL);
pFormatCtx->oformat = fmt;
#ifdef DUMP_TEST
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){
cerr << "Failed to open output file!\n";
return;
}
#endif
audio_st = avformat_new_stream(pFormatCtx, 0);
if (audio_st==NULL){
return;
}
pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
pCodecCtx->sample_rate= 8000;
pCodecCtx->channel_layout=AV_CH_LAYOUT_MONO;
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
pCodecCtx->bit_rate = 64000;
#ifdef DUMP_TEST
//Show some information
av_dump_format(pFormatCtx, 0, out_file, 1);
#endif
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec){
printf("Can not find encoder!\n");
return;
}
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
printf("Failed to open encoder!\n");
return;
}
pFrame = av_frame_alloc();
pFrame->nb_samples= pCodecCtx->frame_size;
pFrame->format= pCodecCtx->sample_fmt;
size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);
//Write Header
#ifdef DUMP_TEST
avformat_write_header(pFormatCtx,NULL);
#endif
}
AudioEncoder::~AudioEncoder()
{
//Flush Encoder
ret = flush_encoder(pFormatCtx,0);
if (ret < 0) {
cerr << "Flushing encoder failed\n";
return;
}
#ifdef DUMP_TEST
//Write Trailer
av_write_trailer(pFormatCtx);
#endif
//Clean
if (audio_st){
avcodec_close(audio_st->codec);
av_free(pFrame);
av_free(frame_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
}

Multithread decoding of Video PID of Mpeg2Ts using FFMPEG

I'm working on an app in VC++ to display video frames of a video Pid of mpeg2ts stream using FFMPEG and need to do the same, for other mpeg2stream simultaneously by using multi thread process,my source code is:
int main (int argc, char* argv[])
{
av_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();
if(avformat_open_input(&pFormatCtx,filepath,NULL,NULL)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
if(avformat_find_stream_info(pFormatCtx,NULL)<0){
printf("Couldn't find stream information.\n");
return -1;
}
videoindex=-1;
for(i=0; i<pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO){
videoindex=i;
break;
}
if(videoindex==-1){
printf("Didn't find a video stream.\n");
return -1;
}
pCodecCtx=pFormatCtx->streams[videoindex]->codec;
pCodec=avcodec_find_decoder(pCodecCtx->codec_id);
if(pCodec==NULL){
printf("Codec not found.\n");
return -1;
}
if(avcodec_open2(pCodecCtx, pCodec,NULL)<0){
printf("Could not open codec.\n");
return -1;
}
pFrame=av_frame_alloc();
pFrameYUV=av_frame_alloc();
out_buffer=(uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
packet=(AVPacket *)av_malloc(sizeof(AVPacket));
//Output Info-----------------------------
printf("--------------- File Information ----------------\n");
av_dump_format(pFormatCtx,0,filepath,0);
printf("-------------------------------------------------\n");
img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
#if OUTPUT_YUV420P
fp_yuv=fopen("output.yuv","wb+");
#endif
if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf( "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
screen_w = pCodecCtx->width;
screen_h = pCodecCtx->height;
//SDL 2.0 Support for multiple windows
screen = SDL_CreateWindow("Simplest ffmpeg player's Window", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
screen_w, screen_h, SDL_WINDOW_OPENGL);
if(!screen) {
printf("SDL: could not create window - exiting:%s\n",SDL_GetError());
return -1;
}
sdlRenderer = SDL_CreateRenderer(screen, -1, 0);
//IYUV: Y + U + V (3 planes)
//YV12: Y + V + U (3 planes)
sdlTexture = SDL_CreateTexture(sdlRenderer, SDL_PIXELFORMAT_IYUV, SDL_TEXTUREACCESS_STREAMING,pCodecCtx->width,pCodecCtx->height);
sdlRect.x=0;
sdlRect.y=0;
sdlRect.w=screen_w;
sdlRect.h=screen_h;
//SDL End----------------------
BYTE buffer [4] ;
int nSize = 0 ;
int nByteCnt = 0 ;
int nPreviuosPos = 0 ;
mpgfile = fopen ("D:\\00_Projects\\Farzan II\\SampleData\\Yahsat1996V_N_PID(2101).pes", "rb");
while(av_read_frame(pFormatCtx, packet)>=0 /*&& nSize > 0*/)
{
if(packet->stream_index==videoindex)
{
ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
if(ret < 0)
{
printf("Decode Error.\n");
return -1;
}
if(got_picture)
{
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
#if OUTPUT_YUV420P
y_size=pCodecCtx->width*pCodecCtx->height;
fwrite(pFrameYUV->data[0],1,y_size,fp_yuv); //Y
fwrite(pFrameYUV->data[1],1,y_size/4,fp_yuv); //U
fwrite(pFrameYUV->data[2],1,y_size/4,fp_yuv); //V
#endif
//SDL---------------------------
#if 0
SDL_UpdateTexture( sdlTexture, NULL, pFrameYUV->data[0], pFrameYUV->linesize[0] );
#else
SDL_UpdateYUVTexture(sdlTexture, &sdlRect,
pFrameYUV->data[0], pFrameYUV->linesize[0],
pFrameYUV->data[1], pFrameYUV->linesize[1],
pFrameYUV->data[2], pFrameYUV->linesize[2]);
#endif
SDL_RenderClear( sdlRenderer );
SDL_RenderCopy( sdlRenderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent( sdlRenderer );
//SDL End-----------------------
//Delay 40ms
SDL_Delay(40);
}
}
av_free_packet(packet);
}
//flush decoder
//FIX: Flush Frames remained in Codec
while (1) {
ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
if (ret < 0)
break;
if (!got_picture)
break;
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
#if OUTPUT_YUV420P
int y_size=pCodecCtx->width*pCodecCtx->height;
fwrite(pFrameYUV->data[0],1,y_size,fp_yuv); //Y
fwrite(pFrameYUV->data[1],1,y_size/4,fp_yuv); //U
fwrite(pFrameYUV->data[2],1,y_size/4,fp_yuv); //V
#endif
//SDL---------------------------
SDL_UpdateTexture( sdlTexture, &sdlRect, pFrameYUV->data[0], pFrameYUV->linesize[0] );
SDL_RenderClear( sdlRenderer );
SDL_RenderCopy( sdlRenderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent( sdlRenderer );
//SDL End-----------------------
//Delay 40ms
SDL_Delay(40);
}
sws_freeContext(img_convert_ctx);
#if OUTPUT_YUV420P
fclose(fp_yuv);
#endif
SDL_Quit();
av_frame_free(&pFrameYUV);
av_frame_free(&pFrame);
avcodec_close(pCodecCtx);
avformat_close_input(&pFormatCtx);
return 0;
}
it works well when i call it in One thread but,after calling this function in multi thread ,the error of access violation occurred , is there anyone to guide me to solution?

encoding direcshow frame buffers by using libavcodec

I am trying to encode a stream buffer of frames grabbed by ISampleGrabber(directshow) by using libavcodec. After encoding those frame I am writing it into a file. But after completion file contains only green frames.
hers is code for grabbing frames and encoding it...
void DSGrabberCallback::initFFMpeg(){
const char* filename="G:/test1.mpg";
avcodec_register_all();
printf("Encode video file %s\n", filename);
AVCodecID codec_id=AV_CODEC_ID_MPEG2VIDEO;
codec = avcodec_find_encoder(codec_id);
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
}
c->bit_rate = 4000000;
c->width = 320;
c->height = 240;
AVRational test;
test.den=25;
test.num=1;
c->time_base= test;
c->gop_size = 10;
//c->max_b_frames=1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
}
picture = alloc_picture(c->pix_fmt, c->width, c->height);
/*picture->format = c->pix_fmt;
picture->width = c->width;
picture->height = c->height;*/
av_init_packet(&pkt);
}
void DSGrabberCallback::encodeFrame(unsigned char *frame,ULONG size){
std::cout<<"called.....";
pkt.data = NULL;
pkt.size = 0;
picture->data[0]=frame;
fflush(stdout);
picture->pts=counter;
ret = avcodec_encode_video2(c, &pkt, picture, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", counter, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
STDMETHODIMP DSGrabberCallback::SampleCB(double time, IMediaSample* sample)
{
BYTE* data = NULL;
ULONG length = 0;
m_bytes=NULL;
counter=counter+1;
if(FAILED(sample->GetPointer(&data)))
{
return E_FAIL;
}
length = sample->GetActualDataLength();
if(length == 0)
{
return S_OK;
}
if(!m_bytes || m_bytesLength < length)
{
if(m_bytes)
{
delete[] m_bytes;
}
m_bytes = new unsigned char[length];
m_bytesLength = length;
}
if(true)
{
for(size_t row = 0 ; row < 480 ; row++)
{
memcpy((m_bytes + row * 640 * 2), data + (480 - 1 - row) * 640 * 2,
640 * 2);
}
}
std::cout<<"hiiiiiiiiiiiiiiiiiiiiiiii";
// memcpy(m_bytes, data, length);
// std::cout<<"called............... "<<m_bytes<<"\n";
if(counter<500){
encodeFrame(m_bytes,length);
}else{
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&picture->data[0]);
avcodec_free_frame(&picture);
printf("\n");
exit(1);
}
//rtp.sendRTP(data,length);
//sample->Release();
//printf("Sample received: %p %u\n", data, length);
return S_OK;
}
can anyone tell me where is the problem.

Now working fine. Actually I forgot to convert the image buffer into YUV420P format. I have added some code for scaling the buffer into YUV format and everything is fine now. Thank you Wimmel and Roman R.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Output RTSP stream with ffmpeg - visual-c++

Related

Audio Recording and Playback in C : problem with audio gain

ffmpeg record video plays too fast

FFMPEG AAC encoder issue

Multithread decoding of Video PID of Mpeg2Ts using FFMPEG

encoding direcshow frame buffers by using libavcodec

Categories

Resources