continuous record/recognize audio with pocketsphinx/ffmpeg

continuous record/recognize audio with pocketsphinx/ffmpeg - audio

as the title already says, I want to continuous record raw audio through my microphone.
So the idea was running a simple C program in the background as service that would create chunks of audio and send those files through the sphinx speech recognition.
After that I can do some processing with the recognized words.
The problem is the (continuous) recognition. I can't just record audio chunks containing 10 seconds what i've said, because maybe chunk[33] -> chunk[34] belong together and then sphinx would output something like:
recognized chunk[33] -> ["enable light"]
recognized chunk[34] -> ["5 with 50 percent"]
Another approach would be to continuous record audio but then I can't process big audio files with sphinx.
I'm using the basic example from pocketsphinx:
#include <pocketsphinx.h>
int main(int argc, char *argv[])
{
ps_decoder_t *ps;
cmd_ln_t *config;
FILE *fh;
char const *hyp, *uttid;
int16 buf[512];
int rv;
int32 score;
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", MODELDIR "/en-us/en-us",
"-lm", MODELDIR "/en-us/en-us.lm.bin",
"-dict", MODELDIR "/en-us/cmudict-en-us.dict",
NULL);
if (config == NULL) {
fprintf(stderr, "Failed to create config object, see log for details\n");
return -1;
}
ps = ps_init(config);
if (ps == NULL) {
fprintf(stderr, "Failed to create recognizer, see log for details\n");
return -1;
}
fh = fopen("audiochunk_33.raw", "rb");
if (fh == NULL) {
fprintf(stderr, "Unable to open input file goforward.raw\n");
return -1;
}
rv = ps_start_utt(ps);
while (!feof(fh)) {
size_t nsamp;
nsamp = fread(buf, 2, 512, fh);
rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
}
rv = ps_end_utt(ps);
hyp = ps_get_hyp(ps, &score);
printf("Recognized: %s\n", hyp);
fclose(fh);
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
And here is a basic example using ffmpeg to create a simple audio file/chunk:
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#define N 44100
void main()
{
// Create audio buffer
int16_t buf[N] = {0}; // buffer
int n; // buffer index
double Fs = 44100.0; // sampling frequency
// Generate 1 second of audio data - it's just a 1 kHz sine wave
for (n=0 ; n<N ; ++n) buf[n] = 16383.0 * sin(n*1000.0*2.0*M_PI/Fs);
// Pipe the audio data to ffmpeg, which writes it to a wav file
FILE *pipeout;
pipeout = popen("ffmpeg -y -f s16le -ar 44100 -ac 1 -i - beep.wav", "w");
fwrite(buf, 2, N, pipeout);
pclose(pipeout);
}
BR
Michael

Related

PSET 3 for CS50 - Recover.c - My JPEG files are recovered but they are all empty

So I am currently attempting recover.c from the cs50 pset3 and I have recovered all 49 jpeg files. However, all these jpeg files are empty (with a grey and white grid). Could someone please explain where my code went wrong? I tried check50 to see if my code was correct but it said the recovered images do not match.
I changed my "w" to "wb" in my fopen function too but that didn't seem to work either.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
int main(int argc, char *argv[])
{
//a data type that can store a byte
typedef uint8_t BYTE;
//Checking to see if there is only one command line argument
if (argc != 2)
{
fprintf(stderr, "Usage: ./recover image\n" );
return 1;
}
//Opening the file to see if its correct
char *infile = argv[1];
FILE *memory = fopen(infile, "r");
if (memory == NULL)
{
fprintf(stderr, "Could not open %s.\n", infile);
return 2;
}
//Creation of a buffer
BYTE buffer[512] = {0};
//Whether or not we have found a JPEG or not
bool jpegfound = false;
//the number of JPEG files found
int numJPEGfile = 0;
//declaring the new to be JPEG file so that it has a scope for the
whole while loop
FILE *img = NULL;
//declaring the new JPEG filename
char filename[8];
//Repeating until the end of card
while(fread(buffer, 512, 1, memory) == 1)
{
//Start of a new JPEG?
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
jpegfound = true;
sprintf(filename, "%03i.jpg", numJPEGfile);
numJPEGfile += 1;
img = fopen(filename, "wb");
fwrite(buffer, 512, 1, img);
}
//Have we already found a JPEG?
if(jpegfound)
{
jpegfound = false;
fclose(img);
}
}
//Close any remaining files
fclose(memory);
return 0;
}

If you run the below command in the terminal, how large are the jpg-files you recovered? I think this will give you a hint for solving this pset.
ls -l

FFmpeg leak while reading image files

While reading image files using a recent version of FFmpeg I'm encountering a memory leak I'm having trouble tracking down.
It seems that after filling the AVFrame with avcodec_send_packet and avcodec_receive_frame, my call to av_frame_free is not actually deallocating the AVBuffer objects withing the frame. The only thing I'm not freeing is the AVCodecContext. If I try to do that, I get a crash.
I've created this sample program, it is about as simple as I can get it. This will keep opening, reading and then closing the same image file in a loop. On my system this leaks memory at an alarming rate.
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
int main(int argc, char **argv) {
av_register_all();
while(1) {
AVFormatContext *fmtCtx = NULL;
if (avformat_open_input(&fmtCtx, "/path/to/test.jpg", NULL, NULL) == 0) {
if (avformat_find_stream_info(fmtCtx, NULL) >= 0) {
for (unsigned int i = 0u; i < fmtCtx -> nb_streams; ++i) {
AVStream *stream = fmtCtx -> streams[i];
AVCodecContext *codecCtx = stream -> codec;
AVCodec *codec = avcodec_find_decoder(codecCtx -> codec_id);
if (avcodec_open2(codecCtx, codec, NULL) == 0) {
AVPacket packet;
if (av_read_frame(fmtCtx, &packet) >= 0) {
if (avcodec_send_packet(codecCtx, &packet) == 0) {
AVFrame *frame = av_frame_alloc();
avcodec_receive_frame(codecCtx, frame);
av_frame_free(&frame);
}
}
av_packet_unref(&packet);
}
}
}
avformat_close_input(&fmtCtx);
}
}
return 0;
}

The solution is to create a copy of the AVCodecContext that was automatically created when the file was opened and use this copy in avcodec_open2. This allows for this copy to be deleted with avcodec_free_context.
With recent versions of FFmpeg, avcodec_copy_context has been deprecated and replaced with AVCodecParameters. Using the following snippet in the sample program from the question plugs the leak:
AVCodecParameters *param = avcodec_parameters_alloc();
AVCodecContext *codecCtx = avcodec_alloc_context3(NULL);
AVCodec *codec = avcodec_find_decoder(stream -> codec -> codec_id);
avcodec_parameters_from_context(param, stream -> codec);
avcodec_parameters_to_context(codecCtx, param);
avcodec_parameters_free(&param);
[...]
avcodec_free_context(&codecCtx);

C++ FFmpeg distorted sound when converting audio

I'm using the FFmpeg library to generate MP4 files containing audio from various files, such as MP3, WAV, OGG, but I'm having some troubles (I'm also putting video in there, but for simplicity's sake I'm omitting that for this question, since I've got that working). My current code opens an audio file, decodes the content and converts it into the MP4 container and finally writes it into the destination file as interleaved frames.
It works perfectly for most MP3 files, but when inputting WAV or OGG, the audio in the resulting MP4 is slightly distorted and often plays at the wrong speed (up to many times faster or slower).
I've looked at countless of examples of using the converting functions (swr_convert), but I can't seem to get rid of the noise in the exported audio.
Here's how I add an audio stream to the MP4 (outContext is the AVFormatContext for the output file):
audioCodec = avcodec_find_encoder(outContext->oformat->audio_codec);
if (!audioCodec)
die("Could not find audio encoder!");
// Start stream
audioStream = avformat_new_stream(outContext, audioCodec);
if (!audioStream)
die("Could not allocate audio stream!");
audioCodecContext = audioStream->codec;
audioStream->id = 1;
// Setup
audioCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
audioCodecContext->bit_rate = 128000;
audioCodecContext->sample_rate = 44100;
audioCodecContext->channels = 2;
audioCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
// Open the codec
if (avcodec_open2(audioCodecContext, audioCodec, NULL) < 0)
die("Could not open audio codec");
And to open a sound file from MP3/WAV/OGG (from the filename variable)...
// Create contex
formatContext = avformat_alloc_context();
if (avformat_open_input(&formatContext, filename, NULL, NULL)<0)
die("Could not open file");
// Find info
if (avformat_find_stream_info(formatContext, 0)<0)
die("Could not find file info");
av_dump_format(formatContext, 0, filename, false);
// Find audio stream
streamId = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (streamId < 0)
die("Could not find Audio Stream");
codecContext = formatContext->streams[streamId]->codec;
// Find decoder
codec = avcodec_find_decoder(codecContext->codec_id);
if (codec == NULL)
die("cannot find codec!");
// Open codec
if (avcodec_open2(codecContext, codec, 0)<0)
die("Codec cannot be found");
// Set up resample context
swrContext = swr_alloc();
if (!swrContext)
die("Failed to alloc swr context");
av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "in_channel_layout", codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", codecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codecContext->sample_fmt, 0);
av_opt_set_int(swrContext, "out_channel_count", audioCodecContext->channels, 0);
av_opt_set_int(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
if (swr_init(swrContext))
die("Failed to init swr context");
Finally, to decode+convert+encode...
// Allocate and init re-usable frames
audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted)
die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0)
die("Could not convert");
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0)
die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
I have also tried setting appropriate pts values for outgoing frames, but that doesn't seem to affect the sound quality at all.
I'm also unsure how/if I should be allocating the converted data, can av_samples_alloc be used for this? What about avcodec_fill_audio_frame? Am I on the right track?
Any input is appreciated (I can also send the exported MP4s if necessary, if you want to hear the distortion).

if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
You seem to be assuming that the encoder will eat all submitted samples - it doesn't. It also doesn't cache them internally. It will eat a specific number of samples (AVCodecContext.frame_size), and the rest should be resubmitted in the next call to avcodec_encode_audio2().
[edit]
ok, so your edited code is better, but not there yet. You're still assuming the decoder will output at least frame_size samples for each call to avcodec_decode_audioN() (after resampling), which may not be the case. If that happens (and it does, for ogg), your avcodec_encode_audioN() call will encode an incomplete input buffer (because you say it's got frame_size samples, but it doesn't). Likewise, your code also doesn't deal with cases where the decoder outputs a number significantly bigger than frame_size (like 10*frame_size) expected by the encoder, in which case you'll get overruns - basically your 1:1 decode/encode mapping is the main source of your problem.
As a solution, consider the swrContext a FIFO, where you input all decoder samples, and loop over it until it's got less than frame_size samples left. I'll leave it up to you to learn how to deal with end-of-stream, because you'll need to flush cached samples out of the decoder (by calling avcodec_decode_audioN() with AVPacket where .data = NULL and .size = 0), flush the swrContext (by calling swr_context() until it returns 0) as well as flush the encoder (by feeding it NULL AVFrames until it returns AVPacket with .size = 0). Right now you'll probably get an output file where the end is slightly truncated. That shouldn't be hard to figure out.
This code works for me for m4a/ogg/mp3 to m4a/aac conversion:
#include "libswresample/swresample.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/opt.h"
#include <stdio.h>
#include <stdlib.h>
static void die(char *str) {
fprintf(stderr, "%s\n", str);
exit(1);
}
static AVStream *add_audio_stream(AVFormatContext *oc, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVCodec *encoder = avcodec_find_encoder(codec_id);
AVStream *st = avformat_new_stream(oc, encoder);
if (!st) die("av_new_stream");
c = st->codec;
c->codec_id = codec_id;
c->codec_type = AVMEDIA_TYPE_AUDIO;
/* put sample parameters */
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = encoder->sample_fmts[0];
c->channel_layout = AV_CH_LAYOUT_STEREO;
// some formats want stream headers to be separate
if(oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
return st;
}
static void open_audio(AVFormatContext *oc, AVStream *st)
{
AVCodecContext *c = st->codec;
AVCodec *codec;
/* find the audio encoder */
codec = avcodec_find_encoder(c->codec_id);
if (!codec) die("avcodec_find_encoder");
/* open it */
AVDictionary *dict = NULL;
av_dict_set(&dict, "strict", "+experimental", 0);
int res = avcodec_open2(c, codec, &dict);
if (res < 0) die("avcodec_open");
}
int main(int argc, char *argv[]) {
av_register_all();
if (argc != 3) {
fprintf(stderr, "%s <in> <out>\n", argv[0]);
exit(1);
}
// Allocate and init re-usable frames
AVCodecContext *fileCodecContext, *audioCodecContext;
AVFormatContext *formatContext, *outContext;
AVStream *audioStream;
SwrContext *swrContext;
int streamId;
// input file
const char *file = argv[1];
int res = avformat_open_input(&formatContext, file, NULL, NULL);
if (res != 0) die("avformat_open_input");
res = avformat_find_stream_info(formatContext, NULL);
if (res < 0) die("avformat_find_stream_info");
AVCodec *codec;
res = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (res < 0) die("av_find_best_stream");
streamId = res;
fileCodecContext = avcodec_alloc_context3(codec);
avcodec_copy_context(fileCodecContext, formatContext->streams[streamId]->codec);
res = avcodec_open2(fileCodecContext, codec, NULL);
if (res < 0) die("avcodec_open2");
// output file
const char *outfile = argv[2];
AVOutputFormat *fmt = fmt = av_guess_format(NULL, outfile, NULL);
if (!fmt) die("av_guess_format");
outContext = avformat_alloc_context();
outContext->oformat = fmt;
audioStream = add_audio_stream(outContext, fmt->audio_codec);
open_audio(outContext, audioStream);
res = avio_open2(&outContext->pb, outfile, AVIO_FLAG_WRITE, NULL, NULL);
if (res < 0) die("url_fopen");
avformat_write_header(outContext, NULL);
audioCodecContext = audioStream->codec;
// resampling
swrContext = swr_alloc();
av_opt_set_channel_layout(swrContext, "in_channel_layout", fileCodecContext->channel_layout, 0);
av_opt_set_channel_layout(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", fileCodecContext->sample_rate, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", fileCodecContext->sample_fmt, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
res = swr_init(swrContext);
if (res < 0) die("swr_init");
AVFrame *audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
AVFrame *audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted) die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext, NULL, 0,
//&convertedData,
//audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0) die("Could not convert");
for (;;) {
outSamples = swr_get_out_samples(swrContext, 0);
if (outSamples < audioCodecContext->frame_size * audioCodecContext->channels) break; // see comments, thanks to #dajuric for fixing this
outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples, NULL, 0);
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0) die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
}
swr_close(swrContext);
swr_free(&swrContext);
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
av_write_trailer(outContext);
avio_close(outContext->pb);
avcodec_close(fileCodecContext);
avcodec_free_context(&fileCodecContext);
avformat_close_input(&formatContext);
return 0;
}

I wanted to include a couple things I found when I was working with the above code.
I had one file get stuck in an infinite loop. The reason is the file had a sample rate of 48000 and the code changes it to a 44100. This caused it to always have extra outSamples. swr_convert & would not grab them. So I ended up changing add_audio_stream to match the input streams sample rate.
c->sample_rate = fileCodecContext->sample_rate;
Also I had to produce wav files as my output. And it had a framesize of 0. so I just chose a number after a few tests I went with 32. I noticed if I went too big (ex 128) I would get audio glitches.
if (audioFrameConverted->nb_samples <= 0) audioFrameConverted->nb_samples = 32; //wav files have a 0
Changed the if statement that breaks out of the loop to check nb_samples if frame_size is 0.
if ((outSamples < audioCodecContext->frame_size * audioCodecContext->channels) || audioCodecContext->frame_size==0 && (outSamples < audioFrameConverted->nb_samples * audioCodecContext->channels)) break; // see comments, thanks to #dajuric for fixing this
There was also a glitch when I was testing outputting to ogg files where the timestamp data was missing so the file wouldn't play correctly in vlc. There were a few lines I added that helped with that.
out_audioStream->time_base = in_audioStream->time_base; // entered before avio_open.
outPacket.dts = audioFrameDecoded->pkt_dts;//rest after avcodec_encode_audio2
outPacket.pts = audioFrameDecoded->pkt_pts;
av_packet_rescale_ts(&outPacket, in_audioStream->time_base, out_audioStream->time_base);
Variables might be a little different I converted the code to c#. Thought this might help someone.

Actually swr_convert won't work for that, try to use swr_convert_frame instead.

Encoding FLOAT PCM to OGG using libav

I am currently trying to convert a raw PCM Float buffer to an OGG encoded file. I tried several library to do the encoding process and I finally chose libavcodec.
What I precisely want to do is get the float buffer ([-1;1]) provided by my audio library and turn it to a char buffer of encoded ogg data.
I managed to encode the float buffer to a buffer of encoded MP2 with this (proof of concept) code:
static AVCodec *codec;
static AVCodecContext *c;
static AVPacket pkt;
static uint16_t* samples;
static AVFrame* frame;
static int frameEncoded;
FILE *file;
int main(int argc, char *argv[])
{
file = fopen("file.ogg", "w+");
long ret;
avcodec_register_all();
codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(NULL);
c->bit_rate = 256000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->channel_layout = AV_CH_LAYOUT_STEREO;
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = c->frame_size;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
/* the codec gives us the frame size, in samples,
* we calculate the size of the samples buffer in bytes */
int buffer_size = av_samples_get_buffer_size(NULL, c->channels, c->frame_size,
c->sample_fmt, 0);
if (buffer_size < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
exit(1);
}
samples = av_malloc(buffer_size);
if (!samples) {
fprintf(stderr, "Could not allocate %d bytes for samples buffer\n",
buffer_size);
exit(1);
}
/* setup the data pointers in the AVFrame */
ret = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
(const uint8_t*)samples, buffer_size, 0);
if (ret < 0) {
fprintf(stderr, "Could not setup audio frame\n");
exit(1);
}
}
void myLibraryCallback(float *inbuffer, unsigned int length)
{
for(int j = 0; j < (2 * length); j++) {
if(frameEncoded >= (c->frame_size *2)) {
int avret, got_output;
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
avret = avcodec_encode_audio2(c, &pkt, frame, &got_output);
if (avret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_free_packet(&pkt);
}
frameEncoded = 0;
}
samples[frameEncoded] = inbuffer[j] * SHRT_MAX;
frameEncoded++;
}
}
The code is really simple, I initialize libavencode the usual way, then my audio library sends me processed PCM FLOAT [-1;1] interleaved at 44.1Khz and the number of floats (usually 1024) in the inbuffer for each channel (2 for stereo). So usually, inbuffer contains 2048 floats.
That was easy since I just needed here to convert my PCM to 16P, both interleaved. Moreover it is possible to code a 16P sample on a single char.
Now I would like to apply this to OGG which needs a sample format of AV_SAMPLE_FMT_FLTP.
Since my native format is AV_SAMPLE_FMT_FLT, it should only be some desinterleaving. Which is really easy to do.
The points I don't get are:
How can you send a float buffer on a char buffer ? Do we treat them as-is (float* floatSamples = (float*) samples) ? If so, what means the sample number avcodec gives you ? Is it the number of floats or chars ?
How can you send datas on two buffers (one for left, one for right) when avcodec_fill_audio_frame only takes a (uint8_t*) parameter and not a (uint8_t**) for multiple channels ? Does-it completely change the previous sample code ?
I tried to find some answers myself and I made a LOT of experiments so far but I failed on theses points. Since there is a huge lack of documentation on these, I would be very grateful if you had answers.
Thank you !

How to play and detect an object using captured video in background subtractor model?

everyone.! I am using opencv2.4.2. actually I am doing project on object detection. I tried using BackgroundSubtractorMOG model.
But I am not able to load video file from my computer. While running on real time this below code for segmentation works fine.
I have implemented using frame differencing method for object detection. Now I want to segment whole object from the background. I have static background. so can anybody help me in below code how to segment object from captured video. also how to load a video file?
thank you.
#include "stdafx.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/contrib/contrib.hpp"
#include "conio.h"
#include "time.h"
#include "opencv/cvaux.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/calib3d/calib3d.hpp"
using namespace std;
using namespace cv;
int main(int argc, char** argv)
{
//IplImage* tmp_frame;
//std::string arg = argv[1];
//VideoCapture capture();
cv::VideoCapture cap;
/*CvCapture *cap =cvCaptureFromFile("S:\\offline object detection database\\SINGLE PERSON Database\\video4.avi");
if(!cap){
printf("Capture failure\n");
return -1;
}
IplImage* frame=0;
frame = cvQueryFrame(cap);
if(!frame)
return -1;*/
bool update_bg_model = true;
if( argc < 2 )
cap.open(0);
else
cap.open(std::string(argv[1]));
if( !cap.isOpened() )
{
printf("can not open camera or video file\n");
return -1;
}
Mat tmp_frame, bgmask;
cap >> tmp_frame;
if(!tmp_frame.data)
{
printf("can not read data from the video source\n");
return -1;
}
namedWindow("video", 1);
namedWindow("segmented", 1);
BackgroundSubtractorMOG bgsubtractor;
for(;;)
{
//double t = (double)cvGetTickCount();
cap >> tmp_frame;
if( !tmp_frame.data )
break;
bgsubtractor(tmp_frame, bgmask, update_bg_model ? -1 : 0);
//t = (double)cvGetTickCount() - t;
//printf( "%d. %.1f\n", fr, t/(cvGetTickFrequency()*1000.) );
imshow("video", tmp_frame);
imshow("segmented", bgmask);
char keycode = waitKey(30);
if( keycode == 27 ) break;
if( keycode == ' ' )
update_bg_model = !update_bg_model;
}
return 0;
}

The video loading in opencv works for me. To load a video you can try something like this. Once you have captured frame you either do processing in the loop or can call a separate function.
std::cout<<"Video File "<<argv[1]<<std::endl;
cv::VideoCapture input_video(argv[1]);
namedWindow("My_Win",1);
Mat cap_img;
while(input_video.grab())
{
if(input_video.retrieve(cap_img))
{
imshow("My_Win", cap_img);
/* Once you have the image do all the processing here */
/* Or Call your image processing function */
waitKey(1);
}
}
or You can do something
int main(int argc, char*argv[])
{
char *my_file = "C:\\vid_an2\\desp_me.avi";
std::cout<<"Video File "<<my_file<<std::endl;
cv::VideoCapture input_video;
if(input_video.open(my_file))
{
std::cout<<"Video file open "<<std::endl;
}
else
{
std::cout<<"Not able to Video file open "<<std::endl;
}
namedWindow("My_Win",1);
namedWindow("Segemented", 1);
Mat cap_img;
for(;;)
{
input_video >> cap_img;
imshow("My_Win", cap_img);
waitKey(0);
}
return 0;
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

continuous record/recognize audio with pocketsphinx/ffmpeg - audio

Related

PSET 3 for CS50 - Recover.c - My JPEG files are recovered but they are all empty

FFmpeg leak while reading image files

C++ FFmpeg distorted sound when converting audio

Encoding FLOAT PCM to OGG using libav

How to play and detect an object using captured video in background subtractor model?

Categories

Resources