Capturing latency data with Core Audio - audio

I wrote this code to create an audio file sine.aif using Audio ToolBox framework.
I'm interested in capturing the data latency of this file without running it my phone, just running it on the command line and capturing the time. Is this possible? I do not have much knowledge with Core Audio.
Here is the code:
#import <Foundation/Foundation.h>
#import <AudioToolbox/AudioToolbox.h>
#define SAMPLE_RATE 44100
#define DURATION 5.0
#define FILENAME_FORMAT #"%0.3f-sine.aif"
int main(int argc, const char * argv[]) {
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc]init];
if(argc < 2) {
printf("Usage:CAToneFileGenerator 261.526\n");
return -1;
}
double hz = atof(argv[1]);
assert (hz > 0);
NSLog (#"generating %f hz tone", hz);
NSString *fileName = [NSString stringWithFormat:FILENAME_FORMAT, hz];
NSString *filePath = [[[NSFileManager defaultManager]currentDirectoryPath]stringByAppendingPathComponent:fileName];
NSURL *fileURL = [NSURL fileURLWithPath:filePath];
//Prepare the format
AudioStreamBasicDescription asbd;
memset(&asbd, 0, sizeof(asbd));
asbd.mSampleRate = SAMPLE_RATE;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
//Set up the file
AudioFileID audioFile;
OSStatus audioErr = noErr;
audioErr = AudioFileCreateWithURL((CFURLRef)fileURL, kAudioFileAIFFType, &asbd, kAudioFileFlags_EraseFile, &audioFile);
assert(audioErr == noErr);
//Start writing samples
long maxSampleCount = SAMPLE_RATE * DURATION;
//NSLog (#"maxSampleCount %long", maxSampleCount);
long sampleCount = 0;
UInt32 bytesToWrite = 2;
double wavelengthInSamples = SAMPLE_RATE / hz;
NSLog (#"wavelengthInSamples %f", wavelengthInSamples);
while (sampleCount < maxSampleCount) {
for (int i=0; i < wavelengthInSamples; i++) {
// sine wave
SInt16 sample = CFSwapInt16HostToBig ((SInt16)SHRT_MAX * sin(2 * M_PI * (i / wavelengthInSamples)));
audioErr = AudioFileWriteBytes(audioFile, false, sampleCount*2, &bytesToWrite, &sample);
assert(audioErr == noErr);
sampleCount++;
}
}
audioErr = AudioFileClose(audioFile);
assert(audioErr == noErr);
NSLog (#"wrote %d samples", sampleCount);
[pool drain];
return 0;
}

Related

Audio Recording and Playback in C : problem with audio gain

The question essentially is how to correctly apply gain to an audio sample?
I'm programming on FreeBSD and OSS, but manipulate volume in audio sample is probably the same for other OS and applications.
I'm studying others' applications internals like ecasound (in C++) and SoX (in C) but I don't know whats wrong when I read a sample and apply gain to it : it becomes distorted and noisy. My point is to understand why it is not working to turn the volume down (gain lesser than 1).
I'm working with stereo 16 bit LE samples. Without applying gain, it works perfectly (recording and playback).
I thought that I should convert an integer sample to float; multiply by a gain factor and restore it to integer. But it is not working. And it seems to be the exact same approach for SoX in src/vol.c in function static int flow.
Below is my code (no additional libs used). The function playback is where I'm applying gain.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/raw-file.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
int *buf[fragmentSize];
read(fdDsp, buf, fragmentSize);
write(fdOutput, buf, fragmentSize);
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
int buf[fragmentSize];
eof = read(fdInput, buf, fragmentSize); //bytes read or -1 if EOF
// audio processing:
for(int i=0;i<fragmentSize;i++){
// learning how to get left and right channels from buffer
int l = (buf)[i] & 0xffff;
int r = ((buf)[i] >> 16) & 0xffff ;
// FIXME: it is causing distortion:
float fl = l;
float fr = r;
fl *= 1.0;
fr *= 0.3; //if different than 1, sounds distorted and noisy
l = fl;
r = fr;
// OK: unite Left and Right channels again
int lr = (l ) | (r << 16);
// OK: other options to mix these two channels:
int lleft = l; //Just the left channel
int rright = (r << 16); //Just the right channel
int lmono = (l << 16) | l; //Left ch. on both channels
int rmono = (r << 16) | r; //Right ch. on both channels
// the output:
(buf)[i] = lr;
}
write(fdDsp, buf, fragmentSize);
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
UPDATE:
As pointed out by CL, I was using the wrong type and the last parameter of read()/write() is greater than the size of the buffer.
So, in FreeBSD I changed the buffer type to int16_t (short) defined in #include <stdint.h> .
Now I can correctly apply a gain as desired:
float fl = l;
float fr = r;
fl *= 1.0f;
fr *= 1.5f;
l = fl;
r = fr;
I'll accept CL's answer.
Now the audio processing loop is working with one sample per time (left and right interleaved).
Updated code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#include <stdint.h> //has type int16_t (short)
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/stereo.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
// Wrong:
// int *buf[fragmentSize];
// read(fdDsp, buf, fragmentSize);
// write(fdOutput, buf, fragmentSize);
int16_t *buf[fragmentSize/sizeof (int16_t)];
read(fdDsp, buf, fragmentSize/sizeof (int16_t));
write(fdOutput, buf, fragmentSize/sizeof (int16_t));
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
//Wrong buffer type (too large) and wrong last parameter for read():
// int buf[fragmentSize];
// eof = read(fdInput, buf, fragmentSize);
int16_t buf[fragmentSize/sizeof (int16_t)];
eof = read(fdInput, buf, fragmentSize/sizeof (int16_t));
// audio processing:
for(int i=0;i<fragmentSize/sizeof (int16_t);i++){
int16_t l = buf[i];
int16_t r = buf[i+1];
// Using int16_t (short) buffer, gain works but stereo is inverted with factor >= 1.4f
float fl = l;
float fr = r;
fl *= 2.0f;
fr *= 3.0f;
l = fl;
r = fr;
// the output:
(buf)[i] = l;
i++;
(buf)[i] = r;
}
// write(fdDsp, buf, fragmentSize); //wrong
write(fdDsp, buf, fragmentSize/sizeof (int16_t));
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
Thanks,
The last parameter of read()/write() is the number of bytes, but an entry in buf[] has more than one byte.
In the two's complement representation of binary numbers, negative values are (or must be) sign extended, i.e., the most significant bits are ones. In this code, neither extracting L/R channels nor combining them works correctly for negative samples.
The easiest way of handling negative samples would be to use one array entry per sample, i.e., short int.

Output RTSP stream with ffmpeg

I'm attempting to use the ffmpeg libraries to send a video stream from my application to a media server (in this case wowza). I have been able to do the reverse and consume an RTSP stream but I'm having a few issues writing an RTSP stream.
I have found a few examples and attempted to utilise the relevant bits. The code is below. I have simplified it as much as I can. I do only want to send a single H264 bit stream to the wowza server and which it can handle.
I get an "Integer division by zero" exception whenever in the av_interleaved_write_frame function when I try and send a packet. The exception looks like it's related to the packet timestamps not being set correctly. I've tried different values and can get past the exception by setting some contrived values but then the write call fails.
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include "stdafx.h"
#include "windows.h"
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
}
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define VIDEO_CODEC_ID CODEC_ID_H264
static int sws_flags = SWS_BICUBIC;
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
pkt->pts = av_rescale_q_rnd(pkt->pts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->dts = av_rescale_q_rnd(pkt->dts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->duration = av_rescale_q(pkt->duration, *time_base, st->time_base);
pkt->stream_index = st->index;
// Exception occurs here.
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
exit(1);
}
st = avformat_new_stream(oc, *codec);
if (!st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
st->id = oc->nb_streams - 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
return st;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: ");
exit(1);
}
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
fprintf(stderr, "Could not allocate picture: ");
exit(1);
}
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static void write_video_frame(AVFormatContext *oc, AVStream *st, int flush)
{
int ret;
AVCodecContext *c = st->codec;
if (!flush) {
fill_yuv_image(&dst_picture, frame_count, c->width, c->height);
}
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frame_count;
ret = avcodec_encode_video2(c, &pkt, flush ? NULL : frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame:");
exit(1);
}
/* If size is zero, it means the image was buffered. */
if (got_packet) {
ret = write_frame(oc, &c->time_base, st, &pkt);
}
else {
if (flush) {
video_is_eof = 1;
}
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: ");
exit(1);
}
frame_count++;
}
static void close_video(AVFormatContext *oc, AVStream *st)
{
avcodec_close(st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *filename = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *video_st;
AVCodec *video_codec;
double video_time;
int flush, ret;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
AVOutputFormat* oFmt = av_oformat_next(NULL);
while (oFmt) {
if (oFmt->video_codec == VIDEO_CODEC_ID) {
break;
}
oFmt = av_oformat_next(oFmt);
}
if (!oFmt) {
printf("Could not find the required output format.\n");
exit(1);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, oFmt, "rtsp", filename);
if (!oc) {
printf("Could not set the output media context.\n");
exit(1);
}
fmt = oc->oformat;
if (!fmt) {
printf("Could not create the output format.\n");
exit(1);
}
video_st = NULL;
cout << "Codec = " << avcodec_get_name(fmt->video_codec) << endl;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{
video_st = add_stream(oc, &video_codec, fmt->video_codec);
}
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
open_video(oc, video_codec, video_st);
}
av_dump_format(oc, 0, filename, 1);
char errorBuff[80];
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open outfile '%s': %s", filename, av_make_error_string(errorBuff, 80, ret));
return 1;
}
}
flush = 0;
while (video_st && !video_is_eof) {
/* Compute current video time. */
video_time = (video_st && !video_is_eof) ? video_st->pts.val * av_q2d(video_st->time_base) : INFINITY;
if (!flush && (!video_st || video_time >= STREAM_DURATION)) {
flush = 1;
}
if (video_st && !video_is_eof) {
write_video_frame(oc, video_st, flush);
}
}
if (video_st) {
close_video(oc, video_st);
}
if ((fmt->flags & AVFMT_NOFILE)) {
avio_close(oc->pb);
}
avformat_free_context(oc);
printf("finished.\n");
getchar();
return 0;
}
Does anyone have any insights about how the packet timestamps can be successfully set?
I solved the integer division by zero by building ffmpeg on my Windows instance and debugging the av_interleaved_write_frame call. Turns out it was the pts not being set on the video stream object that was causing the exception.
Adding the line below to the while loop in the main function fixed the problem:
video_st->pts.val += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
Here's a sample that works to get a H264 encoded dummy stream to a Wowza server via ffmpeg's RTSP pipeline.
// Roughly based on: https://ffmpeg.org/doxygen/trunk/muxing_8c-source.html
#include <chrono>
#include <thread>
#include <tchar.h>
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
#include <libavutil\time.h>
}
#pragma comment(lib,"libavformat/libavformat.a")
#pragma comment(lib,"libavcodec/libavcodec.a")
#pragma comment(lib,"libavutil/libavutil.a")
#pragma comment(lib,"libswscale/libswscale.a")
#pragma comment(lib,"x264.lib")
#pragma comment(lib,"libswresample/libswresample.a")
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 20
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ //AV_PIX_FMT_NV12;
#define VIDEO_CODEC_ID CODEC_ID_H264
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
av_log(NULL, AV_LOG_ERROR, "Could not find encoder for '%s'.\n", avcodec_get_name(codec_id));
}
else {
st = avformat_new_stream(oc, *codec);
if (!st) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate stream.\n");
}
else {
st->id = oc->nb_streams - 1;
st->time_base.den = st->pts.den = 90000;
st->time_base.num = st->pts.num = 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
}
}
return st;
}
static int open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open video codec.\n", avcodec_get_name(c->codec_id));
}
else {
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate video frame.\n");
ret = -1;
}
else {
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate picture.\n");
}
else {
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
}
}
return ret;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static int write_video_frame(AVFormatContext *oc, AVStream *st, int frameCount)
{
int ret = 0;
AVCodecContext *c = st->codec;
fill_yuv_image(&dst_picture, frameCount, c->width, c->height);
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frameCount;
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error encoding video frame.\n");
}
else {
if (got_packet) {
pkt.stream_index = st->index;
pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
ret = av_write_frame(oc, &pkt);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while writing video frame.\n");
}
}
}
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *url = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
//const char *url = "rtsp://192.168.33.19:1935/ffmpeg/0";
AVFormatContext *outContext;
AVStream *video_st;
AVCodec *video_codec;
int ret = 0, frameCount = 0;
av_log_set_level(AV_LOG_DEBUG);
//av_log_set_level(AV_LOG_TRACE);
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&outContext, NULL, "rtsp", url);
if (!outContext) {
av_log(NULL, AV_LOG_FATAL, "Could not allocate an output context for '%s'.\n", url);
goto end;
}
if (!outContext->oformat) {
av_log(NULL, AV_LOG_FATAL, "Could not create the output format for '%s'.\n", url);
goto end;
}
video_st = add_stream(outContext, &video_codec, VIDEO_CODEC_ID);
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
av_log(NULL, AV_LOG_DEBUG, "Video stream codec %s.\n ", avcodec_get_name(video_st->codec->codec_id));
ret = open_video(outContext, video_codec, video_st);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "Open video stream failed.\n");
goto end;
}
}
else {
av_log(NULL, AV_LOG_FATAL, "Add video stream for the codec '%s' failed.\n", avcodec_get_name(VIDEO_CODEC_ID));
goto end;
}
av_dump_format(outContext, 0, url, 1);
ret = avformat_write_header(outContext, NULL);
if (ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to connect to RTSP server for '%s'.\n", url);
goto end;
}
printf("Press any key to start streaming...\n");
getchar();
auto startSend = std::chrono::system_clock::now();
while (video_st) {
frameCount++;
auto startFrame = std::chrono::system_clock::now();
ret = write_video_frame(outContext, video_st, frameCount);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Write video frame failed.\n", url);
goto end;
}
auto streamDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startSend).count();
printf("Elapsed time %ldms, video stream pts %ld.\n", streamDuration, video_st->pts.val);
if (streamDuration / 1000.0 > STREAM_DURATION) {
break;
}
else {
auto frameDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startFrame).count();
std::this_thread::sleep_for(std::chrono::milliseconds((long)(1000.0 / STREAM_FRAME_RATE - frameDuration)));
}
}
if (video_st) {
avcodec_close(video_st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
avformat_free_context(outContext);
end:
printf("finished.\n");
getchar();
return 0;
}

Using WASAPI api to capture voice input through microphone, but just getting some noise

I am new to WASAPI, following the msdn reference code, here,
http://msdn.microsoft.com/en-us/library/windows/desktop/dd370800(v=vs.85).aspx, to capture audio using WASAPI apis.
Modified msdn reference code slightly for my purpose. I am using a microphone to record my voice, and play it back, it works fine when using SoundRecorder and other Windows in-built apps, but using my test application, not getting any valid sound, just getting some noise.
Here is my code, please let me know, where could I be going wrong:
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define TIME_COUNTER_LIMIT 20
WAVEFORMATEX sinWaveFormat;
CWaveFile sinwave;
HRESULT RecordAudioStream()
{
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice = NULL;
IAudioClient *pAudioClient = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *pwfx = NULL;
UINT32 packetLength = 0;
UINT32 time_counter = 0;
BYTE *pData;
DWORD flags;
UINT32 bytesToCapture = 0;
UINT64 u64DevicePosition = 0;
UINT64 u64QPCPosition = 0;
BYTE temp_buffer[10000];
CoInitializeEx(NULL, COINIT_MULTITHREADED);
hr = CoCreateInstance(
__uuidof(MMDeviceEnumerator), NULL,
CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),
(void**)&pEnumerator);
EXIT_ON_ERROR(hr)
hr = pEnumerator->GetDefaultAudioEndpoint(
eCapture, eConsole, &pDevice);
EXIT_ON_ERROR(hr)
hr = pDevice->Activate(
__uuidof(IAudioClient), CLSCTX_ALL,
NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr)
// convert from Float to PCM and from WAVEFORMATEXTENSIBLE to WAVEFORMATEX
if ((pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) ||
((pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) &&
(reinterpret_cast<WAVEFORMATEXTENSIBLE *>(pwfx)->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)))
{
pwfx->wFormatTag = WAVE_FORMAT_PCM;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * 2; // (nChannels * wBitsPerSample) / 8
pwfx->nAvgBytesPerSec = pwfx->nSamplesPerSec * pwfx->nBlockAlign;
pwfx->cbSize = 0;
}
hr = open_capture_file(pwfx);
EXIT_ON_ERROR(hr)
hr = pAudioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
0,
hnsRequestedDuration,
0,
pwfx,
NULL);
EXIT_ON_ERROR(hr)
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(
__uuidof(IAudioCaptureClient),
(void**)&pCaptureClient);
EXIT_ON_ERROR(hr)
/*
// Notify the audio sink which format to use.
hr = pMySink->SetFormat(pwfx);
EXIT_ON_ERROR(hr)
*/
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC *
bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr)
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
bytesToCapture = packetLength * pwfx->nBlockAlign;
while (packetLength != 0 && time_counter <= TIME_COUNTER_LIMIT)
{
time_counter++;
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(
&pData,
&numFramesAvailable,
&flags, &u64DevicePosition, &u64QPCPosition);
EXIT_ON_ERROR(hr)
if (packetLength != numFramesAvailable)
{
printf("packetlength = %d, numFramesAvailable = %d, does not match.\n", packetLength, numFramesAvailable);
bytesToCapture = numFramesAvailable * pwfx->nBlockAlign;
}
printf("packetlength = %d, numFramesAvailable = %d, bytesToCapture = %d.\n",
packetLength, numFramesAvailable, bytesToCapture);
if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
{
memset(pData, 0, numFramesAvailable * pwfx->nBlockAlign);
}
if (bytesToCapture > sizeof(temp_buffer))
{
printf("bytesToCapture = %d, more than buffer size = %d\n.", bytesToCapture, sizeof(temp_buffer));
continue;
}
memcpy(temp_buffer, pData, bytesToCapture);
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
// Copy the available capture data to the audio sink.
hr = write_to_file(
temp_buffer, bytesToCapture);
EXIT_ON_ERROR(hr)
// Sleep for half the buffer duration.
//Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
bytesToCapture = packetLength * pwfx->nBlockAlign;
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pCaptureClient);
return hr;
}

Get distance from kinect depth image using ubuntu 12.04 LTS and opencv

I found out from one site that it is possible to find distance from the raw depth video output of the Kinect through the 2 bytes assigned to a particular pixel as shown in this link - tutorial. Based on this I written a code to find out the distance of the middle point form the Kinect sensor.
I compiled it and ran the code on Ubuntu and it is showing the output. The output is showing some values as distance. The values are coming around 150->1147. I hope it is showing the distance in mm.
But I am not sure, if it is right or wrong. I am providing the code below. Is my code working correctly or do I need to make some changes?
Code:
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <stdio.h>
#include "libfreenect_cv.h"
int getDist(IplImage *depth){
int x = depth->width/2;
int y = depth->height/2;
printf("width= %d and height %d \n",x,y);
int d = depth->imageData[x*2+y*640*2+1];
printf("1st value is %d \n",d);
d= d << 8;
d= d+depth->imageData[x*2+y*640*2];
return d;
}
IplImage *GlViewColor(IplImage *depth)
{
static IplImage *image = 0;
if (!image) image = cvCreateImage(cvSize(640,480), 8, 3);
unsigned char *depth_mid = (unsigned char*)(image->imageData);
int i;
for (i = 0; i < 640*480; i++) {
int lb = ((short *)depth->imageData)[i] % 256;
int ub = ((short *)depth->imageData)[i] / 256;
switch (ub) {
case 0:
depth_mid[3*i+2] = 255;
depth_mid[3*i+1] = 255-lb;
depth_mid[3*i+0] = 255-lb;
break;
case 1:
depth_mid[3*i+2] = 255;
depth_mid[3*i+1] = lb;
depth_mid[3*i+0] = 0;
break;
case 2:
depth_mid[3*i+2] = 255-lb;
depth_mid[3*i+1] = 255;
depth_mid[3*i+0] = 0;
break;
case 3:
depth_mid[3*i+2] = 0;
depth_mid[3*i+1] = 255;
depth_mid[3*i+0] = lb;
break;
case 4:
depth_mid[3*i+2] = 0;
depth_mid[3*i+1] = 255-lb;
depth_mid[3*i+0] = 255;
break;
case 5:
depth_mid[3*i+2] = 0;
depth_mid[3*i+1] = 0;
depth_mid[3*i+0] = 255-lb;
break;
default:
depth_mid[3*i+2] = 0;
depth_mid[3*i+1] = 0;
depth_mid[3*i+0] = 0;
break;
}
}
return image;
}
int main(int argc, char **argv)
{
while (cvWaitKey(100) != 27) {
IplImage *image = freenect_sync_get_rgb_cv(0);
if (!image) {
printf("Error: Kinect not connected?\n");
return -1;
}
cvCvtColor(image, image, CV_RGB2BGR);
IplImage *depth = freenect_sync_get_depth_cv(0);
if (!depth) {
printf("Error: Kinect not connected?\n");
return -1;
}
cvShowImage("RGB", image);
//int d = getDist(depth);
printf("value is %d \n",getDist(depth));
cvShowImage("Depth", GlViewColor(depth));//GlViewColor(depth)
}
cvDestroyWindow("RGB");
cvDestroyWindow("Depth");
//cvReleaseImage(image);
//cvReleaseImage(depth);
return 0;
}
The code seems to be fine. Scale the image of range (150-1147) to (0-255) and display it as gray scale. It will help you to have a better understanding of the image. Doing so will result in nearest object being dark-Colored and farthest being light-colored. It would be better than using GlViewColor function.

encoding direcshow frame buffers by using libavcodec

I am trying to encode a stream buffer of frames grabbed by ISampleGrabber(directshow) by using libavcodec. After encoding those frame I am writing it into a file. But after completion file contains only green frames.
hers is code for grabbing frames and encoding it...
void DSGrabberCallback::initFFMpeg(){
const char* filename="G:/test1.mpg";
avcodec_register_all();
printf("Encode video file %s\n", filename);
AVCodecID codec_id=AV_CODEC_ID_MPEG2VIDEO;
codec = avcodec_find_encoder(codec_id);
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
}
c->bit_rate = 4000000;
c->width = 320;
c->height = 240;
AVRational test;
test.den=25;
test.num=1;
c->time_base= test;
c->gop_size = 10;
//c->max_b_frames=1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
}
picture = alloc_picture(c->pix_fmt, c->width, c->height);
/*picture->format = c->pix_fmt;
picture->width = c->width;
picture->height = c->height;*/
av_init_packet(&pkt);
}
void DSGrabberCallback::encodeFrame(unsigned char *frame,ULONG size){
std::cout<<"called.....";
pkt.data = NULL;
pkt.size = 0;
picture->data[0]=frame;
fflush(stdout);
picture->pts=counter;
ret = avcodec_encode_video2(c, &pkt, picture, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", counter, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
STDMETHODIMP DSGrabberCallback::SampleCB(double time, IMediaSample* sample)
{
BYTE* data = NULL;
ULONG length = 0;
m_bytes=NULL;
counter=counter+1;
if(FAILED(sample->GetPointer(&data)))
{
return E_FAIL;
}
length = sample->GetActualDataLength();
if(length == 0)
{
return S_OK;
}
if(!m_bytes || m_bytesLength < length)
{
if(m_bytes)
{
delete[] m_bytes;
}
m_bytes = new unsigned char[length];
m_bytesLength = length;
}
if(true)
{
for(size_t row = 0 ; row < 480 ; row++)
{
memcpy((m_bytes + row * 640 * 2), data + (480 - 1 - row) * 640 * 2,
640 * 2);
}
}
std::cout<<"hiiiiiiiiiiiiiiiiiiiiiiii";
// memcpy(m_bytes, data, length);
// std::cout<<"called............... "<<m_bytes<<"\n";
if(counter<500){
encodeFrame(m_bytes,length);
}else{
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&picture->data[0]);
avcodec_free_frame(&picture);
printf("\n");
exit(1);
}
//rtp.sendRTP(data,length);
//sample->Release();
//printf("Sample received: %p %u\n", data, length);
return S_OK;
}
can anyone tell me where is the problem.
Now working fine. Actually I forgot to convert the image buffer into YUV420P format. I have added some code for scaling the buffer into YUV format and everything is fine now. Thank you Wimmel and Roman R.

Resources