Problems with SDL Audio (No output) - audio

I'm facing some problems with understanding how the SDL audio callback works.
I have this simple code, which should generate a simple square wave:
#include "SDL.h"
#include "SDL_audio.h"
#include <stdlib.h>
#include <math.h>
SDL_Surface *screen;
SDL_AudioSpec spec;
Uint32 sound_len=512;
Uint8 *sound_buffer;
int sound_pos = 0;
int counter;
unsigned int phase_delta=600;
unsigned int phase;
unsigned char out;
//Initialization
void init_sdl (void)
{
if (SDL_Init (SDL_INIT_VIDEO|SDL_INIT_AUDIO) < 0)
exit (-1);
atexit (SDL_Quit);
screen = SDL_SetVideoMode (640, 480, 16, SDL_HWSURFACE);
if (screen == NULL)
exit (-1);
}
//Generates a new sample and outputs it to the audio card
void Callback (void *userdata, Uint8 *stream, int len)
{
Uint8 *waveptr;
//Generates a new sample
phase+=phase_delta;
if ((phase>>8)<127) out=255; else out=0;
//End
//Output the current sample to the audio card
waveptr = sound_buffer;
SDL_MixAudio(stream, waveptr, 1, SDL_MIX_MAXVOLUME);
}
void play (void)
{
sound_buffer = new Uint8[512];
sound_len= 512;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.silence = 0;
spec.samples = 512;
spec.padding = 0;
spec.size = 0;
spec.userdata = 0;
spec.callback = Callback;
if (SDL_OpenAudio (&spec, NULL) < 0)
{ //Throw an error
printf ("I don't think you like this: %s\n", SDL_GetError ());
exit (-1);
}
SDL_PauseAudio (0);//Start the audio
}
int main(int argc, char* argv[])
{
init_sdl ();
play ();
SDL_Delay (250);
return 0;
}
I know that the callback is not done right, because I have no idea how to output to the buffer. Each time the callback is called, the first part of the callback function code generates the new sample, and stores it in the variabile Out.
Can anyone here modify this code so that the new samples go from Out to the correct position in the audio buffer?
Also, I don't want to have the code modified in a very super-complex way just to generate the square wave - I have already taken care of that. The wave is generated correctly, each new sample appearing in the variable Out. I just need these samples to be routed correctly to the audio buffer.

You need to cast stream to a actual.format-appropriate datatype and then overwrite the values in stream with len / sizeof( <format's datatype> ) samples.
The square-wave will be kinda hard to hear because the given algorithm will only generate a brief high pulse every ~7.1 million samples (~5 minutes #22050Hz) when phase wraps around.
Try something like this:
#include <SDL.h>
#include <SDL_audio.h>
#include <iostream>
using namespace std;
//Generates new samples and outputs them to the audio card
void Callback( void* userdata, Uint8* stream, int len )
{
// the format of stream depends on actual.format in main()
// we're assuming it's AUDIO_S16SYS
short* samples = reinterpret_cast< short* >( stream );
size_t numSamples = len / sizeof( short );
const unsigned int phase_delta = 600;
static unsigned int phase = 0;
// loop over all our samples
for( size_t i = 0; i < numSamples; ++i )
{
phase+=phase_delta;
short out = 0;
if ((phase>>8)<127) out=SHRT_MAX; else out=0;
samples[i] = out;
}
}
int main( int argc, char* argv[] )
{
if( SDL_Init( SDL_INIT_VIDEO | SDL_INIT_AUDIO ) < 0 )
return -1;
atexit( SDL_Quit );
SDL_Surface* screen = SDL_SetVideoMode( 640, 480, 16, SDL_ANYFORMAT );
if( screen == NULL)
return -1;
SDL_AudioSpec spec;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.samples = 4096;
spec.callback = Callback;
spec.userdata = NULL;
SDL_AudioSpec actual;
if( SDL_OpenAudio( &spec, &actual ) < 0 )
{
cerr << "I don't think you like this: " << SDL_GetError() << endl;
return -1;
}
if( spec.format != actual.format )
{
cerr << "format mismatch!" << endl;
return -1;
}
SDL_PauseAudio( 0 );
SDL_Event ev;
while( SDL_WaitEvent( &ev ) )
{
if( ev.type == SDL_QUIT )
break;
}
SDL_CloseAudio();
SDL_Quit();
return 0;
}

Related

Audio Recording and Playback in C : problem with audio gain

The question essentially is how to correctly apply gain to an audio sample?
I'm programming on FreeBSD and OSS, but manipulate volume in audio sample is probably the same for other OS and applications.
I'm studying others' applications internals like ecasound (in C++) and SoX (in C) but I don't know whats wrong when I read a sample and apply gain to it : it becomes distorted and noisy. My point is to understand why it is not working to turn the volume down (gain lesser than 1).
I'm working with stereo 16 bit LE samples. Without applying gain, it works perfectly (recording and playback).
I thought that I should convert an integer sample to float; multiply by a gain factor and restore it to integer. But it is not working. And it seems to be the exact same approach for SoX in src/vol.c in function static int flow.
Below is my code (no additional libs used). The function playback is where I'm applying gain.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/raw-file.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
int *buf[fragmentSize];
read(fdDsp, buf, fragmentSize);
write(fdOutput, buf, fragmentSize);
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
int buf[fragmentSize];
eof = read(fdInput, buf, fragmentSize); //bytes read or -1 if EOF
// audio processing:
for(int i=0;i<fragmentSize;i++){
// learning how to get left and right channels from buffer
int l = (buf)[i] & 0xffff;
int r = ((buf)[i] >> 16) & 0xffff ;
// FIXME: it is causing distortion:
float fl = l;
float fr = r;
fl *= 1.0;
fr *= 0.3; //if different than 1, sounds distorted and noisy
l = fl;
r = fr;
// OK: unite Left and Right channels again
int lr = (l ) | (r << 16);
// OK: other options to mix these two channels:
int lleft = l; //Just the left channel
int rright = (r << 16); //Just the right channel
int lmono = (l << 16) | l; //Left ch. on both channels
int rmono = (r << 16) | r; //Right ch. on both channels
// the output:
(buf)[i] = lr;
}
write(fdDsp, buf, fragmentSize);
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
UPDATE:
As pointed out by CL, I was using the wrong type and the last parameter of read()/write() is greater than the size of the buffer.
So, in FreeBSD I changed the buffer type to int16_t (short) defined in #include <stdint.h> .
Now I can correctly apply a gain as desired:
float fl = l;
float fr = r;
fl *= 1.0f;
fr *= 1.5f;
l = fl;
r = fr;
I'll accept CL's answer.
Now the audio processing loop is working with one sample per time (left and right interleaved).
Updated code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#include <stdint.h> //has type int16_t (short)
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/stereo.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
// Wrong:
// int *buf[fragmentSize];
// read(fdDsp, buf, fragmentSize);
// write(fdOutput, buf, fragmentSize);
int16_t *buf[fragmentSize/sizeof (int16_t)];
read(fdDsp, buf, fragmentSize/sizeof (int16_t));
write(fdOutput, buf, fragmentSize/sizeof (int16_t));
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
//Wrong buffer type (too large) and wrong last parameter for read():
// int buf[fragmentSize];
// eof = read(fdInput, buf, fragmentSize);
int16_t buf[fragmentSize/sizeof (int16_t)];
eof = read(fdInput, buf, fragmentSize/sizeof (int16_t));
// audio processing:
for(int i=0;i<fragmentSize/sizeof (int16_t);i++){
int16_t l = buf[i];
int16_t r = buf[i+1];
// Using int16_t (short) buffer, gain works but stereo is inverted with factor >= 1.4f
float fl = l;
float fr = r;
fl *= 2.0f;
fr *= 3.0f;
l = fl;
r = fr;
// the output:
(buf)[i] = l;
i++;
(buf)[i] = r;
}
// write(fdDsp, buf, fragmentSize); //wrong
write(fdDsp, buf, fragmentSize/sizeof (int16_t));
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
Thanks,
The last parameter of read()/write() is the number of bytes, but an entry in buf[] has more than one byte.
In the two's complement representation of binary numbers, negative values are (or must be) sign extended, i.e., the most significant bits are ones. In this code, neither extracting L/R channels nor combining them works correctly for negative samples.
The easiest way of handling negative samples would be to use one array entry per sample, i.e., short int.

WCHAR* not compatible with "LPSTR" parameter. In wsprintf C++

Can't compile the following code in C++ with Visual Studio 2017 (Got it from: Source Code):
#include <Wincodec.h> // we use WIC for saving images
#include <d3d9.h> // DirectX 9 header
#pragma comment(lib, "d3d9.lib") // link to DirectX 9 library
#include <stdio.h>
#include <Windows.h>
#include <wchar.h>
#define WIDEN2(x) L ## x
#define WIDEN(x) WIDEN2(x)
#define __WFILE__ WIDEN(__FILE__)
#define HRCHECK(__expr) {hr=(__expr);if(FAILED(hr)){wprintf(L"FAILURE 0x%08X (%i)\n\tline: %u file: '%s'\n\texpr: '" WIDEN(#__expr) L"'\n",hr, hr, __LINE__,__WFILE__);goto cleanup;}}
#define RELEASE(__p) {if(__p!=nullptr){__p->Release();__p=nullptr;}}
HRESULT SavePixelsToFile32bppPBGRA(UINT width, UINT height, UINT stride, LPBYTE pixels, LPWSTR filePath, const GUID &format)
{
if (!filePath || !pixels)
return E_INVALIDARG;
HRESULT hr = S_OK;
IWICImagingFactory *factory = nullptr;
IWICBitmapEncoder *encoder = nullptr;
IWICBitmapFrameEncode *frame = nullptr;
IWICStream *stream = nullptr;
GUID pf = GUID_WICPixelFormat32bppPBGRA;
BOOL coInit = CoInitialize(nullptr);
HRCHECK(CoCreateInstance(CLSID_WICImagingFactory, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&factory)));
HRCHECK(factory->CreateStream(&stream));
HRCHECK(stream->InitializeFromFilename(filePath, GENERIC_WRITE));
HRCHECK(factory->CreateEncoder(format, nullptr, &encoder));
HRCHECK(encoder->Initialize(stream, WICBitmapEncoderNoCache));
HRCHECK(encoder->CreateNewFrame(&frame, nullptr)); // we don't use options here
HRCHECK(frame->Initialize(nullptr)); // we dont' use any options here
HRCHECK(frame->SetSize(width, height));
HRCHECK(frame->SetPixelFormat(&pf));
HRCHECK(frame->WritePixels(height, stride, stride * height, pixels));
HRCHECK(frame->Commit());
HRCHECK(encoder->Commit());
cleanup:
RELEASE(stream);
RELEASE(frame);
RELEASE(encoder);
RELEASE(factory);
if (coInit) CoUninitialize();
return hr;
}
HRESULT Direct3D9TakeScreenshots(UINT adapter, UINT count)
{
HRESULT hr = S_OK;
IDirect3D9 *d3d = nullptr;
IDirect3DDevice9 *device = nullptr;
IDirect3DSurface9 *surface = nullptr;
D3DPRESENT_PARAMETERS parameters = { 0 };
D3DDISPLAYMODE mode;
D3DLOCKED_RECT rc;
UINT pitch;
SYSTEMTIME st;
LPBYTE *shots = nullptr;
// init D3D and get screen size
d3d = Direct3DCreate9(D3D_SDK_VERSION);
HRCHECK(d3d->GetAdapterDisplayMode(adapter, &mode));
parameters.Windowed = TRUE;
parameters.BackBufferCount = 1;
parameters.BackBufferHeight = mode.Height;
parameters.BackBufferWidth = mode.Width;
parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
parameters.hDeviceWindow = NULL;
// create device & capture surface
HRCHECK(d3d->CreateDevice(adapter, D3DDEVTYPE_HAL, NULL, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &parameters, &device));
HRCHECK(device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, nullptr));
// compute the required buffer size
HRCHECK(surface->LockRect(&rc, NULL, 0));
pitch = rc.Pitch;
HRCHECK(surface->UnlockRect());
// allocate screenshots buffers
shots = new LPBYTE[count];
for (UINT i = 0; i < count; i++)
{
shots[i] = new BYTE[pitch * mode.Height];
}
GetSystemTime(&st); // measure the time we spend doing <count> captures
wprintf(L"%i:%i:%i.%i\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
for (UINT i = 0; i < count; i++)
{
// get the data
HRCHECK(device->GetFrontBufferData(0, surface));
// copy it into our buffers
HRCHECK(surface->LockRect(&rc, NULL, 0));
CopyMemory(shots[i], rc.pBits, rc.Pitch * mode.Height);
HRCHECK(surface->UnlockRect());
}
GetSystemTime(&st);
wprintf(L"%i:%i:%i.%i\n", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
// save all screenshots
for (UINT i = 0; i < count; i++)
{
WCHAR file[100];
wsprintf(file, L"cap%i.png", i);
HRCHECK(SavePixelsToFile32bppPBGRA(mode.Width, mode.Height, pitch, shots[i], file, GUID_ContainerFormatPng));
}
cleanup:
if (shots != nullptr)
{
for (UINT i = 0; i < count; i++)
{
delete shots[i];
}
delete[] shots;
}
RELEASE(surface);
RELEASE(device);
RELEASE(d3d);
return hr;
}
int _tmain(int argc, _TCHAR* argv[])
{
HRESULT hr = Direct3D9TakeScreenshots(D3DADAPTER_DEFAULT, 10);
return 0;
}
Gives error at:
// save all screenshots
for (UINT i = 0; i < count; i++)
{
WCHAR file[100];
wsprintf(file, L"cap%i.png", i);
HRCHECK(SavePixelsToFile32bppPBGRA(mode.Width, mode.Height, pitch, shots[i], file, GUID_ContainerFormatPng));
}
Where:
"WCHAR file[100]" is not compatible with "LSPTR" on wsprintf.
L"cap%i.png" (const wchar_t) is not compatible with LPCSTR on wsprintf.
And in:
int _tmain(int argc, _TCHAR* argv[])
{
HRESULT hr = Direct3D9TakeScreenshots(D3DADAPTER_DEFAULT, 10);
return 0;
}
Where _TCHAR is not defined.
¡I'm new to C++ so please give me some hints!

How to write to multiple files on different disks simultaneously in one thread with DMA?

I use aio to write multiple files on different disk in one thread. When I use buffered writing, IO processing is concurrent. But cpu loads is very high. When I open files with DIRECT flag, IO processing isn't concurrent.
How to write to multiple files on different disks simultaneously in one thread with DMA?
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <linux/aio_abi.h>
using namespace std;
long double timeDiff(timespec start, timespec end) {
const long double s = start.tv_sec + start.tv_nsec * 1.0e-9;
const long double e = end.tv_sec + end.tv_nsec * 1.0e-9;
return e - s;
}
// nr: maximum number of requests that can simultaneously reside in the context.
inline int io_setup(unsigned nr, aio_context_t *ctxp) {
return syscall(__NR_io_setup, nr, ctxp);
}
inline int io_destroy(aio_context_t ctx) {
return syscall(__NR_io_destroy, ctx);
}
// Every I/O request that is submitted to
inline int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) {
return syscall(__NR_io_submit, ctx, nr, iocbpp);
}
// For every completed I/O request kernel creates an io_event structure.
// minimal number of events one wants to get.
// maximum number of events one wants to get.
inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
struct io_event *events, struct timespec *timeout) {
return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}
int main(int argc, char *argv[]) {
// prepare data
const unsigned int kAlignment = 4096;
const long data_size = 1600 * 1024 * 12 / 8;
//const long data_size = 2448 * 1344 * 12 / 8;
void * data = memalign(kAlignment, data_size);
memset(data, 0, data_size);
//for (int i = 0; i < data_size; ++i)
// data[i] = 'A';
// prepare fd
//const int file_num = 3;
const int file_num = 2;
int fd_arr[file_num];
for (int i = 0; i < file_num; ++i) {
ostringstream filename;
if (i == 0) {
//filename << "/data/test";
filename << "/test";
} else {
filename << "/data" << i << "/test";
}
//filename << "/data/test" << i;
int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT | O_APPEND, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT, 0644);
if (fd < 0) {
perror("open");
return -1;
}
fd_arr[i] = fd;
}
aio_context_t ctx;
struct io_event events[file_num];
int ret;
ctx = 0;
ret = io_setup(1000, &ctx);
if (ret < 0) {
perror("io_setup");
return -1;
}
struct iocb cbs[file_num];
for (int i = 0; i < file_num; ++i) {
memset(&cbs[i], 0, sizeof(cbs[i]));
}
struct iocb * cbs_pointer[file_num];
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs_pointer[i] = &cbs[i];
cbs[i].aio_fildes = fd_arr[i];
cbs[i].aio_lio_opcode = IOCB_CMD_PWRITE; // IOCV_CMD
cbs[i].aio_nbytes = data_size;
}
timespec tStart, tCurr;
clock_gettime(CLOCK_REALTIME, &tStart);
const int frame_num = 10000;
for (int k = 0; k < frame_num; ++k) {
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs[i].aio_buf = (uint64_t)data;
//cbs[i].aio_offset = k * data_size;
}
ret = io_submit(ctx, file_num, cbs_pointer);
if (ret < 0) {
perror("io_submit");
return -1;
}
/* get reply */
ret = io_getevents(ctx, file_num, file_num, events, NULL);
//printf("events: %d, k: %d\n", ret, k);
}
clock_gettime(CLOCK_REALTIME, &tCurr);
cout << "frame: " << frame_num << " time: " << timeDiff(tStart, tCurr) << endl;
ret = io_destroy(ctx);
if (ret < 0) {
perror("io_destroy");
return -1;
}
// close fd
for (int i = 0; i < file_num; ++i) {
fsync(fd_arr[i]);
close(fd_arr[i]);
}
return 0;
}
Linux can make writes actually async if and only if the physical extents being written are allocated on the disc already. Otherwise it has to take a mutex and do the allocation first, thus everything becomes synchronous.
Note that truncating the file to a new length usually doesn't actually allocate the underlying extents. You need to prewrite the contents first. Thereafter, rewriting the same extents will now be done async and thus become concurrent.
As you might be gathering, async file i/o on Linux is not great, though it keeps on getting better over time. Windows or FreeBSD have far superior implementations. Even OS X is not terrible. Use any of those instead.

Zero copy in using vmsplice/splice in Linux

I am trying to get zero copy semantics working in linux using
vmsplice()/splice() but I don't see any performance improvement. This
is on linux 3.10, tried on 3.0.0 and 2.6.32. The following code tries
to do file writes, I have tried network socket writes() also, couldn't
see any improvement.
Can somebody tell what am I doing wrong ?
Has anyone gotten improvement using vmsplice()/splice() in production ?
#include <assert.h>
#include <fcntl.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <vector>
const char *filename = "Test-File";
const int block_size = 4 * 1024;
const int file_size = 4 * 1024 * 1024;
using namespace std;
int pipes[2];
vector<char *> file_data;
static int NowUsecs() {
struct timeval tv;
const int err = gettimeofday(&tv, NULL);
assert(err >= 0);
return tv.tv_sec * 1000000LL + tv.tv_usec;
}
void CreateData() {
for (int xx = 0; xx < file_size / block_size; ++xx) {
// The data buffer to fill.
char *data = NULL;
assert(posix_memalign(reinterpret_cast<void **>(&data), 4096, block_size) == 0);
file_data.emplace_back(data);
}
}
int SpliceWrite(int fd, char *buf, int buf_len) {
int len = buf_len;
struct iovec iov;
iov.iov_base = buf;
iov.iov_len = len;
while (len) {
int ret = vmsplice(pipes[1], &iov, 1, SPLICE_F_GIFT);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
if (len) {
auto ptr = static_cast<char *>(iov.iov_base);
ptr += ret;
iov.iov_base = ptr;
iov.iov_len -= ret;
}
}
len = buf_len;
while (len) {
int ret = splice(pipes[0], NULL, fd, NULL, len, SPLICE_F_MOVE);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
}
return 1;
}
int WriteToFile(const char *filename, bool use_splice) {
// Open and write to the file.
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
int fd = open(filename, O_CREAT | O_RDWR, mode);
assert(fd >= 0);
const int start = NowUsecs();
for (int xx = 0; xx < file_size / block_size; ++xx) {
if (use_splice) {
SpliceWrite(fd, file_data[xx], block_size);
} else {
assert(write(fd, file_data[xx], block_size) == block_size);
}
}
const int time = NowUsecs() - start;
// Close file.
assert(close(fd) == 0);
return time;
}
void ValidateData() {
// Open and read from file.
const int fd = open(filename, O_RDWR);
assert(fd >= 0);
char *read_buf = (char *)malloc(block_size);
for (int xx = 0; xx < file_size / block_size; ++xx) {
assert(read(fd, read_buf, block_size) == block_size);
assert(memcmp(read_buf, file_data[xx], block_size) == 0);
}
// Close file.
assert(close(fd) == 0);
assert(unlink(filename) == 0);
}
int main(int argc, char **argv) {
auto res = pipe(pipes);
assert(res == 0);
CreateData();
const int without_splice = WriteToFile(filename, false /* use splice */);
ValidateData();
const int with_splice = WriteToFile(filename, true /* use splice */);
ValidateData();
cout << "TIME WITH SPLICE: " << with_splice << endl;
cout << "TIME WITHOUT SPLICE: " << without_splice << endl;
return 0;
}
I did a proof-of-concept some years ago where I got as 4x speedup using an optimized, specially tailored, vmsplice() code. This was measured against a generic socket/write() based solution. This blog post from natsys-lab echoes my findings. But I believe you need to have the exact right use case to get near this number.
So what are you doing wrong? Primarily I think you are measuring the wrong thing. When writing directly to a file you have 1 system call, which is write(). And you are not actually copying data (except to the kernel). When you have a buffer with data that you want to write to disk, it's not gonna get faster than that.
In you vmsplice/splice setup you are still copying you data into the kernel, but you have a total of 2 system calls vmsplice()+splice() to get it to disk. The speed being identical to write() is probably just a testament to Linux system call speed :-)
A more "fair" setup would be to write one program that read() from stdin and write() the same data to stdout. Write an identical program that simply splice() stdin into a file (or point stdout to a file when you run it). Although this setup might be too simple to really show anything.
Aside: an (undocumented?) feature of vmsplice() is that you can also use to to read data from a pipe. I used this in my old POC. It was basically just an IPC layer based on the idea of passing memory pages around using vmsplice().
Note: NowUsecs() probably overflows the int

Output RTSP stream with ffmpeg

I'm attempting to use the ffmpeg libraries to send a video stream from my application to a media server (in this case wowza). I have been able to do the reverse and consume an RTSP stream but I'm having a few issues writing an RTSP stream.
I have found a few examples and attempted to utilise the relevant bits. The code is below. I have simplified it as much as I can. I do only want to send a single H264 bit stream to the wowza server and which it can handle.
I get an "Integer division by zero" exception whenever in the av_interleaved_write_frame function when I try and send a packet. The exception looks like it's related to the packet timestamps not being set correctly. I've tried different values and can get past the exception by setting some contrived values but then the write call fails.
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include "stdafx.h"
#include "windows.h"
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
}
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define VIDEO_CODEC_ID CODEC_ID_H264
static int sws_flags = SWS_BICUBIC;
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
pkt->pts = av_rescale_q_rnd(pkt->pts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->dts = av_rescale_q_rnd(pkt->dts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt->duration = av_rescale_q(pkt->duration, *time_base, st->time_base);
pkt->stream_index = st->index;
// Exception occurs here.
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
exit(1);
}
st = avformat_new_stream(oc, *codec);
if (!st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
st->id = oc->nb_streams - 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
return st;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: ");
exit(1);
}
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
fprintf(stderr, "Could not allocate picture: ");
exit(1);
}
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static void write_video_frame(AVFormatContext *oc, AVStream *st, int flush)
{
int ret;
AVCodecContext *c = st->codec;
if (!flush) {
fill_yuv_image(&dst_picture, frame_count, c->width, c->height);
}
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frame_count;
ret = avcodec_encode_video2(c, &pkt, flush ? NULL : frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame:");
exit(1);
}
/* If size is zero, it means the image was buffered. */
if (got_packet) {
ret = write_frame(oc, &c->time_base, st, &pkt);
}
else {
if (flush) {
video_is_eof = 1;
}
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: ");
exit(1);
}
frame_count++;
}
static void close_video(AVFormatContext *oc, AVStream *st)
{
avcodec_close(st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *filename = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *video_st;
AVCodec *video_codec;
double video_time;
int flush, ret;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
AVOutputFormat* oFmt = av_oformat_next(NULL);
while (oFmt) {
if (oFmt->video_codec == VIDEO_CODEC_ID) {
break;
}
oFmt = av_oformat_next(oFmt);
}
if (!oFmt) {
printf("Could not find the required output format.\n");
exit(1);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, oFmt, "rtsp", filename);
if (!oc) {
printf("Could not set the output media context.\n");
exit(1);
}
fmt = oc->oformat;
if (!fmt) {
printf("Could not create the output format.\n");
exit(1);
}
video_st = NULL;
cout << "Codec = " << avcodec_get_name(fmt->video_codec) << endl;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{
video_st = add_stream(oc, &video_codec, fmt->video_codec);
}
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
open_video(oc, video_codec, video_st);
}
av_dump_format(oc, 0, filename, 1);
char errorBuff[80];
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open outfile '%s': %s", filename, av_make_error_string(errorBuff, 80, ret));
return 1;
}
}
flush = 0;
while (video_st && !video_is_eof) {
/* Compute current video time. */
video_time = (video_st && !video_is_eof) ? video_st->pts.val * av_q2d(video_st->time_base) : INFINITY;
if (!flush && (!video_st || video_time >= STREAM_DURATION)) {
flush = 1;
}
if (video_st && !video_is_eof) {
write_video_frame(oc, video_st, flush);
}
}
if (video_st) {
close_video(oc, video_st);
}
if ((fmt->flags & AVFMT_NOFILE)) {
avio_close(oc->pb);
}
avformat_free_context(oc);
printf("finished.\n");
getchar();
return 0;
}
Does anyone have any insights about how the packet timestamps can be successfully set?
I solved the integer division by zero by building ffmpeg on my Windows instance and debugging the av_interleaved_write_frame call. Turns out it was the pts not being set on the video stream object that was causing the exception.
Adding the line below to the while loop in the main function fixed the problem:
video_st->pts.val += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
Here's a sample that works to get a H264 encoded dummy stream to a Wowza server via ffmpeg's RTSP pipeline.
// Roughly based on: https://ffmpeg.org/doxygen/trunk/muxing_8c-source.html
#include <chrono>
#include <thread>
#include <tchar.h>
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavformat\avio.h>
#include <libswscale\swscale.h>
#include <libavutil\time.h>
}
#pragma comment(lib,"libavformat/libavformat.a")
#pragma comment(lib,"libavcodec/libavcodec.a")
#pragma comment(lib,"libavutil/libavutil.a")
#pragma comment(lib,"libswscale/libswscale.a")
#pragma comment(lib,"x264.lib")
#pragma comment(lib,"libswresample/libswresample.a")
using namespace std;
static int video_is_eof;
#define STREAM_DURATION 20
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ //AV_PIX_FMT_NV12;
#define VIDEO_CODEC_ID CODEC_ID_H264
/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVStream *st;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
av_log(NULL, AV_LOG_ERROR, "Could not find encoder for '%s'.\n", avcodec_get_name(codec_id));
}
else {
st = avformat_new_stream(oc, *codec);
if (!st) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate stream.\n");
}
else {
st->id = oc->nb_streams - 1;
st->time_base.den = st->pts.den = 90000;
st->time_base.num = st->pts.num = 1;
c = st->codec;
c->codec_id = codec_id;
c->bit_rate = 400000;
c->width = 352;
c->height = 288;
c->time_base.den = STREAM_FRAME_RATE;
c->time_base.num = 1;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
}
}
return st;
}
static int open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
int ret;
AVCodecContext *c = st->codec;
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open video codec.\n", avcodec_get_name(c->codec_id));
}
else {
/* allocate and init a re-usable frame */
frame = av_frame_alloc();
if (!frame) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate video frame.\n");
ret = -1;
}
else {
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* Allocate the encoded raw picture. */
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate picture.\n");
}
else {
/* copy data and linesize picture pointers to frame */
*((AVPicture *)frame) = dst_picture;
}
}
}
return ret;
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static int write_video_frame(AVFormatContext *oc, AVStream *st, int frameCount)
{
int ret = 0;
AVCodecContext *c = st->codec;
fill_yuv_image(&dst_picture, frameCount, c->width, c->height);
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
/* encode the image */
frame->pts = frameCount;
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error encoding video frame.\n");
}
else {
if (got_packet) {
pkt.stream_index = st->index;
pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
ret = av_write_frame(oc, &pkt);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while writing video frame.\n");
}
}
}
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
printf("starting...\n");
const char *url = "rtsp://test:password#192.168.33.19:1935/ffmpeg/0";
//const char *url = "rtsp://192.168.33.19:1935/ffmpeg/0";
AVFormatContext *outContext;
AVStream *video_st;
AVCodec *video_codec;
int ret = 0, frameCount = 0;
av_log_set_level(AV_LOG_DEBUG);
//av_log_set_level(AV_LOG_TRACE);
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&outContext, NULL, "rtsp", url);
if (!outContext) {
av_log(NULL, AV_LOG_FATAL, "Could not allocate an output context for '%s'.\n", url);
goto end;
}
if (!outContext->oformat) {
av_log(NULL, AV_LOG_FATAL, "Could not create the output format for '%s'.\n", url);
goto end;
}
video_st = add_stream(outContext, &video_codec, VIDEO_CODEC_ID);
/* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
if (video_st) {
av_log(NULL, AV_LOG_DEBUG, "Video stream codec %s.\n ", avcodec_get_name(video_st->codec->codec_id));
ret = open_video(outContext, video_codec, video_st);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "Open video stream failed.\n");
goto end;
}
}
else {
av_log(NULL, AV_LOG_FATAL, "Add video stream for the codec '%s' failed.\n", avcodec_get_name(VIDEO_CODEC_ID));
goto end;
}
av_dump_format(outContext, 0, url, 1);
ret = avformat_write_header(outContext, NULL);
if (ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to connect to RTSP server for '%s'.\n", url);
goto end;
}
printf("Press any key to start streaming...\n");
getchar();
auto startSend = std::chrono::system_clock::now();
while (video_st) {
frameCount++;
auto startFrame = std::chrono::system_clock::now();
ret = write_video_frame(outContext, video_st, frameCount);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Write video frame failed.\n", url);
goto end;
}
auto streamDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startSend).count();
printf("Elapsed time %ldms, video stream pts %ld.\n", streamDuration, video_st->pts.val);
if (streamDuration / 1000.0 > STREAM_DURATION) {
break;
}
else {
auto frameDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startFrame).count();
std::this_thread::sleep_for(std::chrono::milliseconds((long)(1000.0 / STREAM_FRAME_RATE - frameDuration)));
}
}
if (video_st) {
avcodec_close(video_st->codec);
av_free(src_picture.data[0]);
av_free(dst_picture.data[0]);
av_frame_free(&frame);
}
avformat_free_context(outContext);
end:
printf("finished.\n");
getchar();
return 0;
}

Resources