How to use alsa to play sounds simultaneously in c? - multithreading

I'm using alsa lib in c under linux.
I'd like to load several wav files and play them depending on some test conditions.
I'm using the following code, but it needs to be improved:
// A simple C example to play a mono or stereo, 16-bit 44KHz
// WAVE file using ALSA. This goes directly to the first
// audio card (ie, its first set of audio out jacks). It
// uses the snd_pcm_writei() mode of outputting waveform data,
// blocking.
//
// Compile as so to create "alsawave":
// gcc -o alsawave alsawave.c -lasound
//
// Run it from a terminal, specifying the name of a WAVE file to play:
// ./alsawave MyWaveFile.wav
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
// Include the ALSA .H file that defines ALSA functions/data
#include <alsa/asoundlib.h>
#pragma pack (1)
/////////////////////// WAVE File Stuff /////////////////////
// An IFF file header looks like this
typedef struct _FILE_head
{
unsigned char ID[4]; // could be {'R', 'I', 'F', 'F'} or {'F', 'O', 'R', 'M'}
unsigned int Length; // Length of subsequent file (including remainder of header). This is in
// Intel reverse byte order if RIFF, Motorola format if FORM.
unsigned char Type[4]; // {'W', 'A', 'V', 'E'} or {'A', 'I', 'F', 'F'}
} FILE_head;
// An IFF chunk header looks like this
typedef struct _CHUNK_head
{
unsigned char ID[4]; // 4 ascii chars that is the chunk ID
unsigned int Length; // Length of subsequent data within this chunk. This is in Intel reverse byte
// order if RIFF, Motorola format if FORM. Note: this doesn't include any
// extra byte needed to pad the chunk out to an even size.
} CHUNK_head;
// WAVE fmt chunk
typedef struct _FORMAT {
short wFormatTag;
unsigned short wChannels;
unsigned int dwSamplesPerSec;
unsigned int dwAvgBytesPerSec;
unsigned short wBlockAlign;
unsigned short wBitsPerSample;
// Note: there may be additional fields here, depending upon wFormatTag
} FORMAT;
#pragma pack()
// Size of the audio card hardware buffer. Here we want it
// set to 1024 16-bit sample points. This is relatively
// small in order to minimize latency. If you have trouble
// with underruns, you may need to increase this, and PERIODSIZE
// (trading off lower latency for more stability)
#define BUFFERSIZE (2*1024)
// How many sample points the ALSA card plays before it calls
// our callback to fill some more of the audio card's hardware
// buffer. Here we want ALSA to call our callback after every
// 64 sample points have been played
#define PERIODSIZE (2*64)
// Handle to ALSA (audio card's) playback port
snd_pcm_t *PlaybackHandle;
// Handle to our callback thread
snd_async_handler_t *CallbackHandle;
// Points to loaded WAVE file's data
unsigned char *WavePtr;
// Size (in frames) of loaded WAVE file's data
snd_pcm_uframes_t WaveSize;
// Sample rate
unsigned short WaveRate;
// Bit resolution
unsigned char WaveBits;
// Number of channels in the wave file
unsigned char WaveChannels;
// The name of the ALSA port we output to. In this case, we're
// directly writing to hardware card 0,0 (ie, first set of audio
// outputs on the first audio card)
static const char SoundCardPortName[] = "default";
// For WAVE file loading
static const unsigned char Riff[4] = { 'R', 'I', 'F', 'F' };
static const unsigned char Wave[4] = { 'W', 'A', 'V', 'E' };
static const unsigned char Fmt[4] = { 'f', 'm', 't', ' ' };
static const unsigned char Data[4] = { 'd', 'a', 't', 'a' };
/********************** compareID() *********************
* Compares the passed ID str (ie, a ptr to 4 Ascii
* bytes) with the ID at the passed ptr. Returns TRUE if
* a match, FALSE if not.
*/
static unsigned char compareID(const unsigned char * id, unsigned char * ptr)
{
register unsigned char i = 4;
while (i--)
{
if ( *(id)++ != *(ptr)++ ) return(0);
}
return(1);
}
/********************** waveLoad() *********************
* Loads a WAVE file.
*
* fn = Filename to load.
*
* RETURNS: 0 if success, non-zero if not.
*
* NOTE: Sets the global "WavePtr" to an allocated buffer
* containing the wave data, and "WaveSize" to the size
* in sample points.
*/
static unsigned char waveLoad(const char *fn)
{
const char *message;
FILE_head head;
register int inHandle;
if ((inHandle = open(fn, O_RDONLY)) == -1)
message = "didn't open";
// Read in IFF File header
else
{
if (read(inHandle, &head, sizeof(FILE_head)) == sizeof(FILE_head))
{
// Is it a RIFF and WAVE?
if (!compareID(&Riff[0], &head.ID[0]) || !compareID(&Wave[0], &head.Type[0]))
{
message = "is not a WAVE file";
goto bad;
}
// Read in next chunk header
while (read(inHandle, &head, sizeof(CHUNK_head)) == sizeof(CHUNK_head))
{
// ============================ Is it a fmt chunk? ===============================
if (compareID(&Fmt[0], &head.ID[0]))
{
FORMAT format;
// Read in the remainder of chunk
if (read(inHandle, &format.wFormatTag, sizeof(FORMAT)) != sizeof(FORMAT)) break;
// Can't handle compressed WAVE files
if (format.wFormatTag != 1)
{
message = "compressed WAVE not supported";
goto bad;
}
WaveBits = (unsigned char)format.wBitsPerSample;
WaveRate = (unsigned short)format.dwSamplesPerSec;
WaveChannels = format.wChannels;
}
// ============================ Is it a data chunk? ===============================
else if (compareID(&Data[0], &head.ID[0]))
{
// Size of wave data is head.Length. Allocate a buffer and read in the wave data
if (!(WavePtr = (unsigned char *)malloc(head.Length)))
{
message = "won't fit in RAM";
goto bad;
}
if (read(inHandle, WavePtr, head.Length) != head.Length)
{
free(WavePtr);
break;
}
// Store size (in frames)
WaveSize = (head.Length * 8) / ((unsigned int)WaveBits * (unsigned int)WaveChannels);
close(inHandle);
return(0);
}
// ============================ Skip this chunk ===============================
else
{
if (head.Length & 1) ++head.Length; // If odd, round it up to account for pad byte
lseek(inHandle, head.Length, SEEK_CUR);
}
}
}
message = "is a bad WAVE file";
bad: close(inHandle);
}
printf("%s %s\n", fn, message);
return(1);
}
/********************** play_audio() **********************
* Plays the loaded waveform.
*
* NOTE: ALSA sound card's handle must be in the global
* "PlaybackHandle". A pointer to the wave data must be in
* the global "WavePtr", and its size of "WaveSize".
*/
static void play_audio(void)
{
register snd_pcm_uframes_t count, frames;
// Output the wave data
count = 0;
do
{
frames = snd_pcm_writei(PlaybackHandle, WavePtr + count, WaveSize - count);
// If an error, try to recover from it
if (frames < 0)
frames = snd_pcm_recover(PlaybackHandle, frames, 0);
if (frames < 0)
{
printf("Error playing wave: %s\n", snd_strerror(frames));
break;
}
// Update our pointer
count += frames;
} while (count < WaveSize);
// Wait for playback to completely finish
//if (count == WaveSize)
//snd_pcm_drain(PlaybackHandle);
}
/*********************** free_wave_data() *********************
* Frees any wave data we loaded.
*
* NOTE: A pointer to the wave data be in the global
* "WavePtr".
*/
static void free_wave_data(void)
{
if (WavePtr) free(WavePtr);
WavePtr = 0;
}
int main(int argc, char **argv)
{
// No wave data loaded yet
WavePtr = 0;
if (argc < 2)
printf("You must supply the name of a 16-bit mono WAVE file to play\n");
// Load the wave file
else if (!waveLoad(argv[1]))
{
register int err;
// Open audio card we wish to use for playback
if ((err = snd_pcm_open(&PlaybackHandle, &SoundCardPortName[0], SND_PCM_STREAM_PLAYBACK, 0)) < 0)
printf("Can't open audio %s: %s\n", &SoundCardPortName[0], snd_strerror(err));
else
{
switch (WaveBits)
{
case 8:
err = SND_PCM_FORMAT_U8;
break;
case 16:
err = SND_PCM_FORMAT_S16;
break;
case 24:
err = SND_PCM_FORMAT_S24;
break;
case 32:
err = SND_PCM_FORMAT_S32;
break;
}
// Set the audio card's hardware parameters (sample rate, bit resolution, etc)
if ((err = snd_pcm_set_params(PlaybackHandle, err, SND_PCM_ACCESS_RW_INTERLEAVED, WaveChannels, WaveRate, 1, 100000)) < 0)
printf("Can't set sound parameters: %s\n", snd_strerror(err));
// Play the waveform
else
play_audio();
int i;
usleep(10000);
play_audio();
play_audio();
// Close sound card
snd_pcm_close(PlaybackHandle);
}
}
// Free the WAVE data
free_wave_data();
return(0);
}
As I would like to play multiple sounds simultaneously, I started to try to play the same sound more than once, so I commented the following lines:
if (count == WaveSize)
snd_pcm_drain(PlaybackHandle);
in the play_audio function.
Unfortunately, that doesn't really works, because if I try to play the same sound more than once, it works, but, if I insert a long delay before I play the sound, nothing is played.
for instance, in the main function
play_audio();
usleep(10000);
play_audio();
play_audio();
works, and I can hear the same sound three times. But, if I use usleep(100000), I hear the sound only once.
Another problem is that it has to wait for the first sound to end before it starts to play the next one.
So, I'd like to be able to send more than one sound, and play several sounds at the same time. I would like to mix them manually (it's not really difficult). The main function will contain a while loop with some tests to determine which sound(s) need to be played.
I thought about putting play_audio in a thread and run it in an infinite loop, and have the main thread that modifies (mix, etc.) WavePtr.
I just don't really know if this is the right way, or if there is a more efficient method.
Any suggestions? Thanks.

Related

Alsa buffer underrun

I am trying to write random noise to to a device and allow my loop to sleep when I have written enough data. My understanding is that for each call to snd_pcm_writei I am writing 162 bytes (81 frames) which at 8khz rate and 16bit format it should be enough audio for ~10ms. I have verified that alsa does tell me I have written 81 frames.
I would expect that I can then sleep for a short amount of time before waking up and pushing the next 10 ms worth of data. However when I sleep for any amount - even a single ms - I start to get buffer underrun errors.
Obviously I have made an incorrect assumption somewhere. Can anyone point me to what I may be missing? I have removed most error checking to shorten the code - but there are no errors initializing the alsa system on my end. I would like to be able to push 10ms of audio and sleep (even for 1 ms) before pushing the next 10ms.
#include <alsa/asoundlib.h>
#include <spdlog/spdlog.h>
int main(int argc, char **argv) {
snd_pcm_t* handle;
snd_pcm_hw_params_t* hw;
unsigned int rate = 8000;
unsigned long periodSize = rate / 100; //period every 10 ms
int err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, 0);
snd_pcm_hw_params_malloc(&hw);
snd_pcm_hw_params_any(handle, hw);
snd_pcm_hw_params_set_access(handle, hw, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(handle, hw, SND_PCM_FORMAT_S16_LE);
snd_pcm_hw_params_set_rate(handle, hw, rate, 0);
snd_pcm_hw_params_set_channels(handle, hw, 1);
int dir = 1;
snd_pcm_hw_params_set_period_size_near(handle, hw, &periodSize, &dir);
snd_pcm_hw_params(handle, hw);
snd_pcm_uframes_t frames;
snd_pcm_hw_params_get_period_size(hw, &frames, &dir);
int size = frames * 2; // two bytes a sample
char* buffer = (char*)malloc(size);
unsigned int periodTime;
snd_pcm_hw_params_get_period_time(hw,&periodTime, &dir);
snd_pcm_hw_params_free(hw);
snd_pcm_prepare(handle);
char* randomNoise = new char[size];
for(int i = 0; i < size; i++)
randomNoise[i] = random() % 0xFF;
while(true) {
err = snd_pcm_writei(handle, randomNoise, size/2);
if(err > 0) {
spdlog::info("Write {} frames", err);
} else {
spdlog::error("Error write {}\n", snd_strerror(err));
snd_pcm_recover(handle, err, 0);
continue;
}
usleep(1000); // <---- This is what causes the buffer underrun
}
}
Try to put in /etc/pulse/daemon.conf :
default-fragments = 5
default-fragment-size-msec = 2
and restart linux.
What I don't understand is why you write a buffer of size "size" to the device, and in the approximate calculations of time you rely on the "periodSize" declared by you. Then write a buffer with the size "periodSize" to the device.

Capture image with V4L2 on jetson TX2

To explain my process, find below a diagram:
I am working on computed tomography scanner. I use jetson TX2 for image acquisition and pre-processing.
From the jetson, I control the turn table and the camera. The camera is the FSM-IMX304m. I need to access the raw pointer. For that, I need to control the camera using V4L2 (we advise not use libargus to access raw pointer, because it is store in the ISP and the ISP compress data .. Can you confirm it ?). My first problem is about the documentation about v4l2, I didn't find a clear documentation for the C++ API .. I need to control:
exposure time;
gain;
function to clear the buffer.
I found a sample on internet, see how V4L2 works :
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <linux/ioctl.h>
#include <linux/types.h>
#include <linux/v4l2-common.h>
#include <linux/v4l2-controls.h>
#include <linux/videodev2.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <string.h>
#include <fstream>
#include <string>
using namespace std;
int main() {
// 1. Open the device
int fd; // A file descriptor to the video device
fd = open("/dev/video0",O_RDWR);
if(fd < 0){
perror("Failed to open device, OPEN");
return 1;
}
// 2. Ask the device if it can capture frames
v4l2_capability capability;
if(ioctl(fd, VIDIOC_QUERYCAP, &capability) < 0){
// something went wrong... exit
perror("Failed to get device capabilities, VIDIOC_QUERYCAP");
return 1;
}
// 3. Set Image format
v4l2_format imageFormat;
imageFormat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
imageFormat.fmt.pix.width = 1024;
imageFormat.fmt.pix.height = 1024;
imageFormat.fmt.pix.pixelformat = V4L2_PIX_FMT_MJPEG;
imageFormat.fmt.pix.field = V4L2_FIELD_NONE;
// tell the device you are using this format
if(ioctl(fd, VIDIOC_S_FMT, &imageFormat) < 0){
perror("Device could not set format, VIDIOC_S_FMT");
return 1;
}
// 4. Request Buffers from the device
v4l2_requestbuffers requestBuffer = {0};
requestBuffer.count = 1; // one request buffer
requestBuffer.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; // request a buffer which we can use for capturing frames
requestBuffer.memory = V4L2_MEMORY_MMAP;
if(ioctl(fd, VIDIOC_REQBUFS, &requestBuffer) < 0){
perror("Could not request buffer from device, VIDIOC_REQBUFS");
return 1;
}
// 5. Query the buffer to get raw data ie. ask for the you requested buffer
// and allocate memory for it
v4l2_buffer queryBuffer = {0};
queryBuffer.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
queryBuffer.memory = V4L2_MEMORY_MMAP;
queryBuffer.index = 0;
if(ioctl(fd, VIDIOC_QUERYBUF, &queryBuffer) < 0){
perror("Device did not return the buffer information, VIDIOC_QUERYBUF");
return 1;
}
// use a pointer to point to the newly created buffer
// mmap() will map the memory address of the device to
// an address in memory
char* buffer = (char*)mmap(NULL, queryBuffer.length, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, queryBuffer.m.offset);
memset(buffer, 0, queryBuffer.length);
// 6. Get a frame
// Create a new buffer type so the device knows which buffer we are talking about
v4l2_buffer bufferinfo;
memset(&bufferinfo, 0, sizeof(bufferinfo));
bufferinfo.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufferinfo.memory = V4L2_MEMORY_MMAP;
bufferinfo.index = 0;
// Activate streaming
int type = bufferinfo.type;
if(ioctl(fd, VIDIOC_STREAMON, &type) < 0){
perror("Could not start streaming, VIDIOC_STREAMON");
return 1;
}
/***************************** Begin looping here *********************/
// Queue the buffer
if(ioctl(fd, VIDIOC_QBUF, &bufferinfo) < 0){
perror("Could not queue buffer, VIDIOC_QBUF");
return 1;
}
// Dequeue the buffer
if(ioctl(fd, VIDIOC_DQBUF, &bufferinfo) < 0){
perror("Could not dequeue the buffer, VIDIOC_DQBUF");
return 1;
}
// Frames get written after dequeuing the buffer
cout << "Buffer has: " << (double)bufferinfo.bytesused / 1024
<< " KBytes of data" << endl;
// Write the data out to file
ofstream outFile;
outFile.open("webcam_output.jpeg", ios::binary| ios::app);
int bufPos = 0, outFileMemBlockSize = 0; // the position in the buffer and the amount to copy from
// the buffer
int remainingBufferSize = bufferinfo.bytesused; // the remaining buffer size, is decremented by
// memBlockSize amount on each loop so we do not overwrite the buffer
char* outFileMemBlock = NULL; // a pointer to a new memory block
int itr = 0; // counts thenumber of iterations
while(remainingBufferSize > 0) {
bufPos += outFileMemBlockSize; // increment the buffer pointer on each loop
// initialise bufPos before outFileMemBlockSize so we can start
// at the beginning of the buffer
outFileMemBlockSize = 1024; // set the output block size to a preferable size. 1024 :)
outFileMemBlock = new char[sizeof(char) * outFileMemBlockSize];
// copy 1024 bytes of data starting from buffer+bufPos
memcpy(outFileMemBlock, buffer+bufPos, outFileMemBlockSize);
outFile.write(outFileMemBlock,outFileMemBlockSize);
// calculate the amount of memory left to read
// if the memory block size is greater than the remaining
// amount of data we have to copy
if(outFileMemBlockSize > remainingBufferSize)
outFileMemBlockSize = remainingBufferSize;
// subtract the amount of data we have to copy
// from the remaining buffer size
remainingBufferSize -= outFileMemBlockSize;
// display the remaining buffer size
cout << itr++ << " Remaining bytes: "<< remainingBufferSize << endl;
delete outFileMemBlock;
}
// Close the file
outFile.close();
/******************************** end looping here **********************/
// end streaming
if(ioctl(fd, VIDIOC_STREAMOFF, &type) < 0){
perror("Could not end streaming, VIDIOC_STREAMOFF");
return 1;
}
close(fd);
return 0;
}
On the jetson, the code compile perfectly, but I can't run the code. It is blocked at this step :
// Dequeue the buffer
if(ioctl(fd, VIDIOC_DQBUF, &bufferinfo) < 0){
perror("Could not dequeue the buffer, VIDIOC_DQBUF");
return 1;
}
It is like the code is blocked in an endless loop. I have tested the code on my personal computer which runs Ubuntu 18.04, and the sample works well.
I do not have this sensor, but I assume that:
Your pixel format is incorrectly set imageFormat.fmt.pix.pixelformat = V4L2_PIX_FMT_MJPEG;
Most likely there should be 12-bit raw data from this sensor V4L2_PIX_FMT_Y12 or one of these options (Mono8/10/12/16, Bayer8/10/12/16, RGB8, YUV422, YUV411).
You can view the available formats in the Linux kernel here https://elixir.bootlin.com/linux/v4.9.237/source/include/uapi/linux/videodev2.h#L499
Check the documentation for your sensor.
Since Nvidia developers have extended the v4l2 subsystem, you need to use the following controls to adjust exposure and gain: TEGRA_CAMERA_CID_EXPOSURE, TEGRA_CAMERA_CID_GAIN. See file tegra-v4l2-camera.h
And also check the sensor controls:
v4l2-ctl --list-ctrls
....
gain 0x009a2009 (int64): min = 0 max = 480 step = 1 default = 0 value = 0 flags = slider
exposure 0x009a200a (int64): min = 28 max = 1000000 step = 1 default = 27879 value = 28 flags = slider
.....
Also examples of receiving raw data from the camera can be seen in examples from Nvidia
https://docs.nvidia.com/jetson/l4t-multimedia/mmapi_build.html

Is there a way to speed up audio .wav file using some SDL_Mixer's function?

I am making a simple game whose audio speed should increase as the player is approaching the end of the level it is playing. So now I was wondering if there was a way to do this using SDL_Mixer. If SDL_Mixer is not the way to go could you please tell me how could I make this change in the audio file itself to make it faster. I am working with a 8-bit .wav file with 2 channels at the samplerate of 22050.
According to this forum here: https://forums.libsdl.org/viewtopic.php?p=44663, you can use a different library called "SoLoud" to change the playback speed of your sounds on the fly. You can get/see more details on SoLoud here: http://sol.gfxile.net/soloud/. From what I can tell, you cannot do this using SDL2, and SoLoud seems easy enough to use, so that would be my suggestion.
A few years back I was trying to achieve something very similar and, after a lot of web search, I came up with this solution, involving using Mix_RegisterEffect function, which got close:
#include <SDL2/SDL.h>
#include <SDL2/SDL_mixer.h>
#include <iostream>
#include <cstdlib>
#include <cmath>
/* global vars */
Uint16 audioFormat; // current audio format constant
int audioFrequency, // frequency rate of the current audio format
audioChannelCount, // number of channels of the current audio format
audioAllocatedMixChannelsCount; // number of mix channels allocated
static inline Uint16 formatSampleSize(Uint16 format)
{
return (format & 0xFF) / 8;
}
// Get chunk time length (in ms) given its size and current audio format
static int computeChunkLengthMillisec(int chunkSize)
{
/* bytes / samplesize == sample points */
const Uint32 points = chunkSize / formatSampleSize(audioFormat);
/* sample points / channels == sample frames */
const Uint32 frames = (points / audioChannelCount);
/* (sample frames * 1000) / frequency == play length, in ms */
return ((frames * 1000) / audioFrequency);
}
// Custom handler object to control which part of the Mix_Chunk's audio data will be played, with which pitch-related modifications.
// This needed to be a template because the actual Mix_Chunk's data format may vary (AUDIO_U8, AUDIO_S16, etc) and the data type varies with it (Uint8, Sint16, etc)
// The AudioFormatType should be the data type that is compatible with the current SDL_mixer-initialized audio format.
template<typename AudioFormatType>
struct PlaybackSpeedEffectHandler
{
const AudioFormatType* const chunkData; // pointer to the chunk sample data (as array)
const float& speedFactor; // the playback speed factor
int position; // current position of the sound, in ms
const int duration; // the duration of the sound, in ms
const int chunkSize; // the size of the sound, as a number of indexes (or sample points). thinks of this as a array size when using the proper array type (instead of just Uint8*).
const bool loop; // flags whether playback should stay looping
const bool attemptSelfHalting; // flags whether playback should be halted by this callback when playback is finished
bool altered; // true if this playback has been pitched by this handler
PlaybackSpeedEffectHandler(const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
: chunkData(reinterpret_cast<AudioFormatType*>(chunk.abuf)), speedFactor(speed),
position(0), duration(computeChunkLengthMillisec(chunk.alen)),
chunkSize(chunk.alen / formatSampleSize(audioFormat)),
loop(loop), attemptSelfHalting(trySelfHalt), altered(false)
{}
// processing function to be able to change chunk speed/pitch.
void modifyStreamPlaybackSpeed(int mixChannel, void* stream, int length)
{
AudioFormatType* buffer = static_cast<AudioFormatType*>(stream);
const int bufferSize = length / sizeof(AudioFormatType); // buffer size (as array)
const int bufferDuration = computeChunkLengthMillisec(length); // buffer time duration
const float speedFactor = this->speedFactor; // take a "snapshot" of speed factor
// if there is still sound to be played
if(position < duration || loop)
{
// if playback is unaltered and pitch is required (for the first time)
if(!altered && speedFactor != 1.0f)
altered = true; // flags playback modification and proceed to the pitch routine.
if(altered) // if unaltered, this pitch routine is skipped
{
const float delta = 1000.0/audioFrequency, // normal duration of each sample
vdelta = delta*speedFactor; // virtual stretched duration, scaled by 'speedFactor'
for(int i = 0; i < bufferSize; i += audioChannelCount)
{
const int j = i/audioChannelCount; // j goes from 0 to size/channelCount, incremented 1 by 1
const float x = position + j*vdelta; // get "virtual" index. its corresponding value will be interpolated.
const int k = floor(x / delta); // get left index to interpolate from original chunk data (right index will be this plus 1)
const float proportion = (x / delta) - k; // get the proportion of the right value (left will be 1.0 minus this)
// usually just 2 channels: 0 (left) and 1 (right), but who knows...
for(int c = 0; c < audioChannelCount; c++)
{
// check if k will be within bounds
if(k*audioChannelCount + audioChannelCount - 1 < chunkSize || loop)
{
AudioFormatType leftValue = chunkData[( k * audioChannelCount + c) % chunkSize],
rightValue = chunkData[((k+1) * audioChannelCount + c) % chunkSize];
// put interpolated value on 'data' (linear interpolation)
buffer[i + c] = (1-proportion)*leftValue + proportion*rightValue;
}
else // if k will be out of bounds (chunk bounds), it means we already finished; thus, we'll pass silence
{
buffer[i + c] = 0;
}
}
}
}
// update position
position += bufferDuration * speedFactor; // this is not exact since a frame may play less than its duration when finished playing, but its simpler
// reset position if looping
if(loop) while(position > duration)
position -= duration;
}
else // if we already played the whole sound but finished earlier than expected by SDL_mixer (due to faster playback speed)
{
// set silence on the buffer since Mix_HaltChannel() poops out some of it for a few ms.
for(int i = 0; i < bufferSize; i++)
buffer[i] = 0;
if(attemptSelfHalting)
Mix_HaltChannel(mixChannel); // XXX unsafe call, since it locks audio; but no safer solution was found yet...
}
}
// Mix_EffectFunc_t callback that redirects to handler method (handler passed via userData)
static void mixEffectFuncCallback(int channel, void* stream, int length, void* userData)
{
static_cast<PlaybackSpeedEffectHandler*>(userData)->modifyStreamPlaybackSpeed(channel, stream, length);
}
// Mix_EffectDone_t callback that deletes the handler at the end of the effect usage (handler passed via userData)
static void mixEffectDoneCallback(int, void *userData)
{
delete static_cast<PlaybackSpeedEffectHandler*>(userData);
}
// function to register a handler to this channel for the next playback.
static void registerEffect(int channel, const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
{
Mix_RegisterEffect(channel, mixEffectFuncCallback, mixEffectDoneCallback, new PlaybackSpeedEffectHandler(chunk, speed, loop, trySelfHalt));
}
};
// Register playback speed effect handler according to the current audio format; effect valid for a single playback; if playback is looped, lasts until it's halted
void setupPlaybackSpeedEffect(const Mix_Chunk* const chunk, const float& speed, int channel, bool loop=false, bool trySelfHalt=false)
{
// select the register function for the current audio format and register the effect using the compatible handlers
// XXX is it correct to behave the same way to all S16 and U16 formats? Should we create case statements for AUDIO_S16SYS, AUDIO_S16LSB, AUDIO_S16MSB, etc, individually?
switch(audioFormat)
{
case AUDIO_U8: PlaybackSpeedEffectHandler<Uint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_S8: PlaybackSpeedEffectHandler<Sint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_U16: PlaybackSpeedEffectHandler<Uint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
default:
case AUDIO_S16: PlaybackSpeedEffectHandler<Sint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_S32: PlaybackSpeedEffectHandler<Sint32>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_F32: PlaybackSpeedEffectHandler<float >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
}
}
// example
// run the executable passing an filename of a sound file that SDL_mixer is able to open (ogg, wav, ...)
int main(int argc, char** argv)
{
if(argc < 2) { std::cout << "missing argument" << std::endl; return 0; }
SDL_Init(SDL_INIT_AUDIO);
Mix_OpenAudio(MIX_DEFAULT_FREQUENCY, MIX_DEFAULT_FORMAT, MIX_DEFAULT_CHANNELS, 4096);
Mix_QuerySpec(&audioFrequency, &audioFormat, &audioChannelCount); // query specs
audioAllocatedMixChannelsCount = Mix_AllocateChannels(MIX_CHANNELS);
float speed = 1.0;
Mix_Chunk* chunk = Mix_LoadWAV(argv[1]);
if(chunk != NULL)
{
const int channel = Mix_PlayChannelTimed(-1, chunk, -1, 8000);
setupPlaybackSpeedEffect(chunk, speed, channel, true);
// loop for 8 seconds, changing the pitch dynamically
while(SDL_GetTicks() < 8000)
speed = 1 + 0.25*sin(0.001*SDL_GetTicks());
}
else
std::cout << "no data" << std::endl;
Mix_FreeChunk(chunk);
Mix_CloseAudio();
Mix_Quit();
SDL_Quit();
return EXIT_SUCCESS;
}
While this works, it's not a perfect solution, since the result has some artifacts (crackling) in most cases, which I wasn't able to figure out why.
Github gist I created for this a while ago.

sndio sio_onmove not calling back.

I'm trying to write a fullduplex test that copies audio in to audio out. sio_onmove does not get called. I have no idea why. Here's my code so far:
#include <stdio.h>
#include <stdlib.h>
#include <sndio.h>
unsigned char buf[0xffff];
struct sio_hdl *hdl;
void cb(void *arg, int delta) {
int l;
printf("call %d\n", delta);
for(;;) {
l = sio_read(hdl, buf, delta);
if(l==0) break;
sio_write(hdl, buf, l);
}
}
int main(void) {
int m, i;
struct sio_par par;
struct sio_cap cap;
hdl = sio_open("rsnd/0", SIO_PLAY | SIO_REC , 1);
sio_getcap(hdl, &cap);
sio_initpar( &par);
par.bits = cap.enc[0].bits;
par.bps = cap.enc[0].bps;
par.sig = cap.enc[0].sig;
par.le = cap.enc[0].le;
par.msb = cap.enc[0].msb;
par.rchan=cap.rchan[0];
par.pchan=cap.pchan[0];
par.rate =cap.rate[0];
par.appbufsz = 1024;
sio_setpar(hdl, &par);
sio_onmove(hdl, cb, NULL);
sio_start(hdl);
for(;;)
sleep(1);
}
I'm initializing rsnd/0 for recording and play back. The parameters I'm initializing from a getcap call. I'm then setting cb as the callback for onmove. I then start audio. From there I loop forever doing nothing
The sio_onmove() call-back is called either from sio_revents() if non-blocking i/o is used or from blocking sio_read() or sio_write().
As above program calls sleep(1) instead, the call-back is never called.
AFAIU, to do the full-duplex test, you could use blocking i/o (set to 0 last argument of the sio_open() function) and do the following steps:
call sio_initpar() to initialize a sio_par structure, as you do
set your preferred parameters in the sio_par structure
call sio_setpar() to submit them to the device. devices exposed through the server (ex. "snd/0") will accept any parameters, while raw devices (ex. "rsnd/0") pick something close to whatever the hardware supports.
call sio_getpar() to get the parameters the device accepted, this is needed to get the device buffer size
possibly check if they are usable by your program
call sio_start()
prime the play buffer by writing par.bufsz samples with sio_write(). This corresponds to: par.bufsz * par.pchan * par.bps bytes.
At this stage, device starts and you could do the main-loop as with the following pseudo-code:
unsigned char *data;
size_t n, todo, blksz;
blksz = par.round * par.rchan * par.bps;
for (;;) {
/* read one block */
data = buf;
todo = blksz;
while (todo > 0) {
n = sio_read(hdl, data, todo);
if (n == 0)
errx(1, "failed");
todo -= n;
data += n;
}
/* write one block */
n = sio_write(hdl, buf, blksz);
if (n != blksz)
errx(1, "failed");
}
The sio_onmove() call-back is not needed for pure audio programs. It's only useful to synchronize non-audio events (ex video, midi messages) to the audio stream.

Encoding FLOAT PCM to OGG using libav

I am currently trying to convert a raw PCM Float buffer to an OGG encoded file. I tried several library to do the encoding process and I finally chose libavcodec.
What I precisely want to do is get the float buffer ([-1;1]) provided by my audio library and turn it to a char buffer of encoded ogg data.
I managed to encode the float buffer to a buffer of encoded MP2 with this (proof of concept) code:
static AVCodec *codec;
static AVCodecContext *c;
static AVPacket pkt;
static uint16_t* samples;
static AVFrame* frame;
static int frameEncoded;
FILE *file;
int main(int argc, char *argv[])
{
file = fopen("file.ogg", "w+");
long ret;
avcodec_register_all();
codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(NULL);
c->bit_rate = 256000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->channel_layout = AV_CH_LAYOUT_STEREO;
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = c->frame_size;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
/* the codec gives us the frame size, in samples,
* we calculate the size of the samples buffer in bytes */
int buffer_size = av_samples_get_buffer_size(NULL, c->channels, c->frame_size,
c->sample_fmt, 0);
if (buffer_size < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
exit(1);
}
samples = av_malloc(buffer_size);
if (!samples) {
fprintf(stderr, "Could not allocate %d bytes for samples buffer\n",
buffer_size);
exit(1);
}
/* setup the data pointers in the AVFrame */
ret = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
(const uint8_t*)samples, buffer_size, 0);
if (ret < 0) {
fprintf(stderr, "Could not setup audio frame\n");
exit(1);
}
}
void myLibraryCallback(float *inbuffer, unsigned int length)
{
for(int j = 0; j < (2 * length); j++) {
if(frameEncoded >= (c->frame_size *2)) {
int avret, got_output;
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
avret = avcodec_encode_audio2(c, &pkt, frame, &got_output);
if (avret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_free_packet(&pkt);
}
frameEncoded = 0;
}
samples[frameEncoded] = inbuffer[j] * SHRT_MAX;
frameEncoded++;
}
}
The code is really simple, I initialize libavencode the usual way, then my audio library sends me processed PCM FLOAT [-1;1] interleaved at 44.1Khz and the number of floats (usually 1024) in the inbuffer for each channel (2 for stereo). So usually, inbuffer contains 2048 floats.
That was easy since I just needed here to convert my PCM to 16P, both interleaved. Moreover it is possible to code a 16P sample on a single char.
Now I would like to apply this to OGG which needs a sample format of AV_SAMPLE_FMT_FLTP.
Since my native format is AV_SAMPLE_FMT_FLT, it should only be some desinterleaving. Which is really easy to do.
The points I don't get are:
How can you send a float buffer on a char buffer ? Do we treat them as-is (float* floatSamples = (float*) samples) ? If so, what means the sample number avcodec gives you ? Is it the number of floats or chars ?
How can you send datas on two buffers (one for left, one for right) when avcodec_fill_audio_frame only takes a (uint8_t*) parameter and not a (uint8_t**) for multiple channels ? Does-it completely change the previous sample code ?
I tried to find some answers myself and I made a LOT of experiments so far but I failed on theses points. Since there is a huge lack of documentation on these, I would be very grateful if you had answers.
Thank you !

Resources