Why is this simple audio code not creating any sound? - audio

I am learning how to generate sound using Windows and c++. I was trying to use a simple tutorial, but the code only creates silence. What did I do wrong? I tested with some print commands (which I removed) and the code seems to run to completion. Other sounds (from other programs, of course) play fine, and there are no error messages. I think the code looks right, but I am clearly not capable of judging that...
#include <iostream>
#include <cmath>
#include <windows.h>
#include <mmsystem.h>
// Sample rate
const int SAMPLE_RATE = 44100;
// Frequency of the sine wave
const int FREQUENCY = 440;
int main()
{
// Open the default audio device for output
HWAVEOUT hWaveOut = NULL;
waveOutOpen(&hWaveOut, WAVE_MAPPER, NULL, 0, 0, CALLBACK_NULL);
// Create a WAVEHDR structure to describe the waveform data
WAVEHDR waveHdr;
memset(&waveHdr, 0, sizeof(waveHdr));
// Allocate memory for the waveform data
const int BUFFER_SIZE = SAMPLE_RATE * sizeof(short);
short* samples = (short*)malloc(BUFFER_SIZE);
// Generate the sine wave samples
for (int i = 0; i < SAMPLE_RATE; ++i)
{
float t = i / float(SAMPLE_RATE);
float sample = sin(2 * M_PI * FREQUENCY * t);
samples[i] = short(sample * 32767);
}
// Set up the WAVEHDR structure
waveHdr.lpData = (LPSTR)samples;
waveHdr.dwBufferLength = BUFFER_SIZE;
// Prepare the waveform data for playback
waveOutPrepareHeader(hWaveOut, &waveHdr, sizeof(waveHdr));
// Play the waveform data
waveOutWrite(hWaveOut, &waveHdr, sizeof(waveHdr));
// Wait until the sound finishes playing
while (waveHdr.dwFlags & WHDR_DONE != WHDR_DONE)
{
// Do nothing
}
// Clean up
waveOutUnprepareHeader(hWaveOut, &waveHdr, sizeof(waveHdr));
waveOutClose(hWaveOut);
free(samples);
return 0;
}

You need to set a WAVEFORMATEX structure before opening and playing audio (more information can be found here https://learn.microsoft.com/en-us/windows/win32/api/mmeapi/ns-mmeapi-waveformatex ).
WAVEFORMATEX wfex;
wfex.wFormatTag = WAVE_FORMAT_PCM;
wfex.nChannels = 1;
wfex.wBitsPerSample = 16; //number of bits per sample
wfex.nAvgBytesPerSec = 1 * 44100 * 2;//numChannels * numSamplesPerSec * numBytesPerSample
wfex.nBlockAlign = 1 * 2;//numChannels * numBytesPerSample;
wfex.cbSize = 0;
HWAVEOUT hWaveOut = NULL;
MMRESULT mm = waveOutOpen(&hWaveOut, WAVE_MAPPER, &wfex, 0, 0, CALLBACK_NULL);
if (mm != MMSYSERR_NOERROR){
std::string errorMsg = getWaveError(mm);
//display the message etc
}
In case of an error, you can query the mm variable and display an appropriate message that could help you identify the problem.
std::string getWaveError(MMRESULT mm){
std::string errorMsg;
switch (mm) {
case MMSYSERR_INVALHANDLE:
errorMsg = "Wave exception: Invalid handle";
break;
case MMSYSERR_NOMEM:
errorMsg = "Wave exception: Not enough memory";
break;
case MMSYSERR_NODRIVER:
errorMsg = "Wave exception: No driver present";
break;
case WAVERR_UNPREPARED:
errorMsg = "Wave exception: Unprepared buffer";
break;
case MMSYSERR_HANDLEBUSY:
errorMsg = "Wave exception: Handle busy";
break;
case MMSYSERR_ALLOCATED:
errorMsg = "Wave exception: Device already allocated";
break;
case WAVERR_BADFORMAT:
errorMsg = "Wave exception: Unsupported wave format";
break;
default:
errorMsg = "Wave exception: Unknown error: Code " + mm;
;
}
return errorMsg;
}
Hope this helps.

Related

C++ FFmpeg distorted sound when converting audio

I'm using the FFmpeg library to generate MP4 files containing audio from various files, such as MP3, WAV, OGG, but I'm having some troubles (I'm also putting video in there, but for simplicity's sake I'm omitting that for this question, since I've got that working). My current code opens an audio file, decodes the content and converts it into the MP4 container and finally writes it into the destination file as interleaved frames.
It works perfectly for most MP3 files, but when inputting WAV or OGG, the audio in the resulting MP4 is slightly distorted and often plays at the wrong speed (up to many times faster or slower).
I've looked at countless of examples of using the converting functions (swr_convert), but I can't seem to get rid of the noise in the exported audio.
Here's how I add an audio stream to the MP4 (outContext is the AVFormatContext for the output file):
audioCodec = avcodec_find_encoder(outContext->oformat->audio_codec);
if (!audioCodec)
die("Could not find audio encoder!");
// Start stream
audioStream = avformat_new_stream(outContext, audioCodec);
if (!audioStream)
die("Could not allocate audio stream!");
audioCodecContext = audioStream->codec;
audioStream->id = 1;
// Setup
audioCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
audioCodecContext->bit_rate = 128000;
audioCodecContext->sample_rate = 44100;
audioCodecContext->channels = 2;
audioCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
// Open the codec
if (avcodec_open2(audioCodecContext, audioCodec, NULL) < 0)
die("Could not open audio codec");
And to open a sound file from MP3/WAV/OGG (from the filename variable)...
// Create contex
formatContext = avformat_alloc_context();
if (avformat_open_input(&formatContext, filename, NULL, NULL)<0)
die("Could not open file");
// Find info
if (avformat_find_stream_info(formatContext, 0)<0)
die("Could not find file info");
av_dump_format(formatContext, 0, filename, false);
// Find audio stream
streamId = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (streamId < 0)
die("Could not find Audio Stream");
codecContext = formatContext->streams[streamId]->codec;
// Find decoder
codec = avcodec_find_decoder(codecContext->codec_id);
if (codec == NULL)
die("cannot find codec!");
// Open codec
if (avcodec_open2(codecContext, codec, 0)<0)
die("Codec cannot be found");
// Set up resample context
swrContext = swr_alloc();
if (!swrContext)
die("Failed to alloc swr context");
av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "in_channel_layout", codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", codecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codecContext->sample_fmt, 0);
av_opt_set_int(swrContext, "out_channel_count", audioCodecContext->channels, 0);
av_opt_set_int(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
if (swr_init(swrContext))
die("Failed to init swr context");
Finally, to decode+convert+encode...
// Allocate and init re-usable frames
audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted)
die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0)
die("Could not convert");
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0)
die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
I have also tried setting appropriate pts values for outgoing frames, but that doesn't seem to affect the sound quality at all.
I'm also unsure how/if I should be allocating the converted data, can av_samples_alloc be used for this? What about avcodec_fill_audio_frame? Am I on the right track?
Any input is appreciated (I can also send the exported MP4s if necessary, if you want to hear the distortion).
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
You seem to be assuming that the encoder will eat all submitted samples - it doesn't. It also doesn't cache them internally. It will eat a specific number of samples (AVCodecContext.frame_size), and the rest should be resubmitted in the next call to avcodec_encode_audio2().
[edit]
ok, so your edited code is better, but not there yet. You're still assuming the decoder will output at least frame_size samples for each call to avcodec_decode_audioN() (after resampling), which may not be the case. If that happens (and it does, for ogg), your avcodec_encode_audioN() call will encode an incomplete input buffer (because you say it's got frame_size samples, but it doesn't). Likewise, your code also doesn't deal with cases where the decoder outputs a number significantly bigger than frame_size (like 10*frame_size) expected by the encoder, in which case you'll get overruns - basically your 1:1 decode/encode mapping is the main source of your problem.
As a solution, consider the swrContext a FIFO, where you input all decoder samples, and loop over it until it's got less than frame_size samples left. I'll leave it up to you to learn how to deal with end-of-stream, because you'll need to flush cached samples out of the decoder (by calling avcodec_decode_audioN() with AVPacket where .data = NULL and .size = 0), flush the swrContext (by calling swr_context() until it returns 0) as well as flush the encoder (by feeding it NULL AVFrames until it returns AVPacket with .size = 0). Right now you'll probably get an output file where the end is slightly truncated. That shouldn't be hard to figure out.
This code works for me for m4a/ogg/mp3 to m4a/aac conversion:
#include "libswresample/swresample.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/opt.h"
#include <stdio.h>
#include <stdlib.h>
static void die(char *str) {
fprintf(stderr, "%s\n", str);
exit(1);
}
static AVStream *add_audio_stream(AVFormatContext *oc, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVCodec *encoder = avcodec_find_encoder(codec_id);
AVStream *st = avformat_new_stream(oc, encoder);
if (!st) die("av_new_stream");
c = st->codec;
c->codec_id = codec_id;
c->codec_type = AVMEDIA_TYPE_AUDIO;
/* put sample parameters */
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = encoder->sample_fmts[0];
c->channel_layout = AV_CH_LAYOUT_STEREO;
// some formats want stream headers to be separate
if(oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
return st;
}
static void open_audio(AVFormatContext *oc, AVStream *st)
{
AVCodecContext *c = st->codec;
AVCodec *codec;
/* find the audio encoder */
codec = avcodec_find_encoder(c->codec_id);
if (!codec) die("avcodec_find_encoder");
/* open it */
AVDictionary *dict = NULL;
av_dict_set(&dict, "strict", "+experimental", 0);
int res = avcodec_open2(c, codec, &dict);
if (res < 0) die("avcodec_open");
}
int main(int argc, char *argv[]) {
av_register_all();
if (argc != 3) {
fprintf(stderr, "%s <in> <out>\n", argv[0]);
exit(1);
}
// Allocate and init re-usable frames
AVCodecContext *fileCodecContext, *audioCodecContext;
AVFormatContext *formatContext, *outContext;
AVStream *audioStream;
SwrContext *swrContext;
int streamId;
// input file
const char *file = argv[1];
int res = avformat_open_input(&formatContext, file, NULL, NULL);
if (res != 0) die("avformat_open_input");
res = avformat_find_stream_info(formatContext, NULL);
if (res < 0) die("avformat_find_stream_info");
AVCodec *codec;
res = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (res < 0) die("av_find_best_stream");
streamId = res;
fileCodecContext = avcodec_alloc_context3(codec);
avcodec_copy_context(fileCodecContext, formatContext->streams[streamId]->codec);
res = avcodec_open2(fileCodecContext, codec, NULL);
if (res < 0) die("avcodec_open2");
// output file
const char *outfile = argv[2];
AVOutputFormat *fmt = fmt = av_guess_format(NULL, outfile, NULL);
if (!fmt) die("av_guess_format");
outContext = avformat_alloc_context();
outContext->oformat = fmt;
audioStream = add_audio_stream(outContext, fmt->audio_codec);
open_audio(outContext, audioStream);
res = avio_open2(&outContext->pb, outfile, AVIO_FLAG_WRITE, NULL, NULL);
if (res < 0) die("url_fopen");
avformat_write_header(outContext, NULL);
audioCodecContext = audioStream->codec;
// resampling
swrContext = swr_alloc();
av_opt_set_channel_layout(swrContext, "in_channel_layout", fileCodecContext->channel_layout, 0);
av_opt_set_channel_layout(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", fileCodecContext->sample_rate, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", fileCodecContext->sample_fmt, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
res = swr_init(swrContext);
if (res < 0) die("swr_init");
AVFrame *audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
AVFrame *audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted) die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext, NULL, 0,
//&convertedData,
//audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0) die("Could not convert");
for (;;) {
outSamples = swr_get_out_samples(swrContext, 0);
if (outSamples < audioCodecContext->frame_size * audioCodecContext->channels) break; // see comments, thanks to #dajuric for fixing this
outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples, NULL, 0);
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0) die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
}
swr_close(swrContext);
swr_free(&swrContext);
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
av_write_trailer(outContext);
avio_close(outContext->pb);
avcodec_close(fileCodecContext);
avcodec_free_context(&fileCodecContext);
avformat_close_input(&formatContext);
return 0;
}
I wanted to include a couple things I found when I was working with the above code.
I had one file get stuck in an infinite loop. The reason is the file had a sample rate of 48000 and the code changes it to a 44100. This caused it to always have extra outSamples. swr_convert & would not grab them. So I ended up changing add_audio_stream to match the input streams sample rate.
c->sample_rate = fileCodecContext->sample_rate;
Also I had to produce wav files as my output. And it had a framesize of 0. so I just chose a number after a few tests I went with 32. I noticed if I went too big (ex 128) I would get audio glitches.
if (audioFrameConverted->nb_samples <= 0) audioFrameConverted->nb_samples = 32; //wav files have a 0
Changed the if statement that breaks out of the loop to check nb_samples if frame_size is 0.
if ((outSamples < audioCodecContext->frame_size * audioCodecContext->channels) || audioCodecContext->frame_size==0 && (outSamples < audioFrameConverted->nb_samples * audioCodecContext->channels)) break; // see comments, thanks to #dajuric for fixing this
There was also a glitch when I was testing outputting to ogg files where the timestamp data was missing so the file wouldn't play correctly in vlc. There were a few lines I added that helped with that.
out_audioStream->time_base = in_audioStream->time_base; // entered before avio_open.
outPacket.dts = audioFrameDecoded->pkt_dts;//rest after avcodec_encode_audio2
outPacket.pts = audioFrameDecoded->pkt_pts;
av_packet_rescale_ts(&outPacket, in_audioStream->time_base, out_audioStream->time_base);
Variables might be a little different I converted the code to c#. Thought this might help someone.
Actually swr_convert won't work for that, try to use swr_convert_frame instead.

Does OpenAL-Soft have an upper limit on the number of sources?

I'm using OpenAL-Soft for a project, and right now I'm trying to decide whether I need to implement OpenAL source pooling.
Source pooling is somewhat cumbersome (I need to write code to "allocate" sources, as well as somehow decide when they can be "freed"), but necessary if the number of sources that can be generated by OpenAL is limited.
Since OpenAL-Soft is a software implementation of the OpenAL API, I wonder if the number of sources it can generate is actually limited by the underlying hardware. Theoretically, since all mixing is done in software, there might be no need to actually use one hardware channel per source.
However, I'm not sure about it. How should I proceed?
It appears that OpenAL-Soft indeed does have an upper limit on the number of sources, which can be defined in a config file. The default seems to be 256. It makes sense to limit the number of sources because of the associated CPU and memory costs. Looks like I'll end up implementing a source pool after all.
I just took a peek at its header ... did not see anything pop out.
Here is working code which synthesizes then renders audio buffer data ... you could play with seeing if it accommodates your necessary number of sources
// gcc -o openal_play_wed openal_play_wed.c -lopenal -lm
#include <stdio.h>
#include <stdlib.h> // gives malloc
#include <math.h>
#ifdef __APPLE__
#include <OpenAL/al.h>
#include <OpenAL/alc.h>
#elif __linux
#include <AL/al.h>
#include <AL/alc.h>
#endif
ALCdevice * openal_output_device;
ALCcontext * openal_output_context;
ALuint internal_buffer;
ALuint streaming_source[1];
int al_check_error(const char * given_label) {
ALenum al_error;
al_error = alGetError();
if(AL_NO_ERROR != al_error) {
printf("ERROR - %s (%s)\n", alGetString(al_error), given_label);
return al_error;
}
return 0;
}
void MM_init_al() {
const char * defname = alcGetString(NULL, ALC_DEFAULT_DEVICE_SPECIFIER);
openal_output_device = alcOpenDevice(defname);
openal_output_context = alcCreateContext(openal_output_device, NULL);
alcMakeContextCurrent(openal_output_context);
// setup buffer and source
alGenBuffers(1, & internal_buffer);
al_check_error("failed call to alGenBuffers");
}
void MM_exit_al() {
ALenum errorCode = 0;
// Stop the sources
alSourceStopv(1, & streaming_source[0]); // streaming_source
int ii;
for (ii = 0; ii < 1; ++ii) {
alSourcei(streaming_source[ii], AL_BUFFER, 0);
}
// Clean-up
alDeleteSources(1, &streaming_source[0]);
alDeleteBuffers(16, &streaming_source[0]);
errorCode = alGetError();
alcMakeContextCurrent(NULL);
errorCode = alGetError();
alcDestroyContext(openal_output_context);
alcCloseDevice(openal_output_device);
}
void MM_render_one_buffer() {
/* Fill buffer with Sine-Wave */
// float freq = 440.f;
float freq = 100.f;
float incr_freq = 0.1f;
int seconds = 4;
// unsigned sample_rate = 22050;
unsigned sample_rate = 44100;
double my_pi = 3.14159;
size_t buf_size = seconds * sample_rate;
short * samples = malloc(sizeof(short) * buf_size);
printf("\nhere is freq %f\n", freq);
int i=0;
for(; i<buf_size; ++i) {
samples[i] = 32760 * sin( (2.f * my_pi * freq)/sample_rate * i );
freq += incr_freq;
// incr_freq += incr_freq;
// freq *= factor_freq;
if (100.0 > freq || freq > 5000.0) {
incr_freq *= -1.0f;
}
}
/* upload buffer to OpenAL */
alBufferData( internal_buffer, AL_FORMAT_MONO16, samples, buf_size, sample_rate);
al_check_error("populating alBufferData");
free(samples);
/* Set-up sound source and play buffer */
// ALuint src = 0;
// alGenSources(1, &src);
// alSourcei(src, AL_BUFFER, internal_buffer);
alGenSources(1, & streaming_source[0]);
alSourcei(streaming_source[0], AL_BUFFER, internal_buffer);
// alSourcePlay(src);
alSourcePlay(streaming_source[0]);
// ---------------------
ALenum current_playing_state;
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
while (AL_PLAYING == current_playing_state) {
printf("still playing ... so sleep\n");
sleep(1); // should use a thread sleep NOT sleep() for a more responsive finish
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
}
printf("end of playing\n");
/* Dealloc OpenAL */
MM_exit_al();
} // MM_render_one_buffer
int main() {
MM_init_al();
MM_render_one_buffer();
}

Encoding FLOAT PCM to OGG using libav

I am currently trying to convert a raw PCM Float buffer to an OGG encoded file. I tried several library to do the encoding process and I finally chose libavcodec.
What I precisely want to do is get the float buffer ([-1;1]) provided by my audio library and turn it to a char buffer of encoded ogg data.
I managed to encode the float buffer to a buffer of encoded MP2 with this (proof of concept) code:
static AVCodec *codec;
static AVCodecContext *c;
static AVPacket pkt;
static uint16_t* samples;
static AVFrame* frame;
static int frameEncoded;
FILE *file;
int main(int argc, char *argv[])
{
file = fopen("file.ogg", "w+");
long ret;
avcodec_register_all();
codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(NULL);
c->bit_rate = 256000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->channel_layout = AV_CH_LAYOUT_STEREO;
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = c->frame_size;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
/* the codec gives us the frame size, in samples,
* we calculate the size of the samples buffer in bytes */
int buffer_size = av_samples_get_buffer_size(NULL, c->channels, c->frame_size,
c->sample_fmt, 0);
if (buffer_size < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
exit(1);
}
samples = av_malloc(buffer_size);
if (!samples) {
fprintf(stderr, "Could not allocate %d bytes for samples buffer\n",
buffer_size);
exit(1);
}
/* setup the data pointers in the AVFrame */
ret = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
(const uint8_t*)samples, buffer_size, 0);
if (ret < 0) {
fprintf(stderr, "Could not setup audio frame\n");
exit(1);
}
}
void myLibraryCallback(float *inbuffer, unsigned int length)
{
for(int j = 0; j < (2 * length); j++) {
if(frameEncoded >= (c->frame_size *2)) {
int avret, got_output;
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
avret = avcodec_encode_audio2(c, &pkt, frame, &got_output);
if (avret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_free_packet(&pkt);
}
frameEncoded = 0;
}
samples[frameEncoded] = inbuffer[j] * SHRT_MAX;
frameEncoded++;
}
}
The code is really simple, I initialize libavencode the usual way, then my audio library sends me processed PCM FLOAT [-1;1] interleaved at 44.1Khz and the number of floats (usually 1024) in the inbuffer for each channel (2 for stereo). So usually, inbuffer contains 2048 floats.
That was easy since I just needed here to convert my PCM to 16P, both interleaved. Moreover it is possible to code a 16P sample on a single char.
Now I would like to apply this to OGG which needs a sample format of AV_SAMPLE_FMT_FLTP.
Since my native format is AV_SAMPLE_FMT_FLT, it should only be some desinterleaving. Which is really easy to do.
The points I don't get are:
How can you send a float buffer on a char buffer ? Do we treat them as-is (float* floatSamples = (float*) samples) ? If so, what means the sample number avcodec gives you ? Is it the number of floats or chars ?
How can you send datas on two buffers (one for left, one for right) when avcodec_fill_audio_frame only takes a (uint8_t*) parameter and not a (uint8_t**) for multiple channels ? Does-it completely change the previous sample code ?
I tried to find some answers myself and I made a LOT of experiments so far but I failed on theses points. Since there is a huge lack of documentation on these, I would be very grateful if you had answers.
Thank you !

Problems with SDL Audio (No output)

I'm facing some problems with understanding how the SDL audio callback works.
I have this simple code, which should generate a simple square wave:
#include "SDL.h"
#include "SDL_audio.h"
#include <stdlib.h>
#include <math.h>
SDL_Surface *screen;
SDL_AudioSpec spec;
Uint32 sound_len=512;
Uint8 *sound_buffer;
int sound_pos = 0;
int counter;
unsigned int phase_delta=600;
unsigned int phase;
unsigned char out;
//Initialization
void init_sdl (void)
{
if (SDL_Init (SDL_INIT_VIDEO|SDL_INIT_AUDIO) < 0)
exit (-1);
atexit (SDL_Quit);
screen = SDL_SetVideoMode (640, 480, 16, SDL_HWSURFACE);
if (screen == NULL)
exit (-1);
}
//Generates a new sample and outputs it to the audio card
void Callback (void *userdata, Uint8 *stream, int len)
{
Uint8 *waveptr;
//Generates a new sample
phase+=phase_delta;
if ((phase>>8)<127) out=255; else out=0;
//End
//Output the current sample to the audio card
waveptr = sound_buffer;
SDL_MixAudio(stream, waveptr, 1, SDL_MIX_MAXVOLUME);
}
void play (void)
{
sound_buffer = new Uint8[512];
sound_len= 512;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.silence = 0;
spec.samples = 512;
spec.padding = 0;
spec.size = 0;
spec.userdata = 0;
spec.callback = Callback;
if (SDL_OpenAudio (&spec, NULL) < 0)
{ //Throw an error
printf ("I don't think you like this: %s\n", SDL_GetError ());
exit (-1);
}
SDL_PauseAudio (0);//Start the audio
}
int main(int argc, char* argv[])
{
init_sdl ();
play ();
SDL_Delay (250);
return 0;
}
I know that the callback is not done right, because I have no idea how to output to the buffer. Each time the callback is called, the first part of the callback function code generates the new sample, and stores it in the variabile Out.
Can anyone here modify this code so that the new samples go from Out to the correct position in the audio buffer?
Also, I don't want to have the code modified in a very super-complex way just to generate the square wave - I have already taken care of that. The wave is generated correctly, each new sample appearing in the variable Out. I just need these samples to be routed correctly to the audio buffer.
You need to cast stream to a actual.format-appropriate datatype and then overwrite the values in stream with len / sizeof( <format's datatype> ) samples.
The square-wave will be kinda hard to hear because the given algorithm will only generate a brief high pulse every ~7.1 million samples (~5 minutes #22050Hz) when phase wraps around.
Try something like this:
#include <SDL.h>
#include <SDL_audio.h>
#include <iostream>
using namespace std;
//Generates new samples and outputs them to the audio card
void Callback( void* userdata, Uint8* stream, int len )
{
// the format of stream depends on actual.format in main()
// we're assuming it's AUDIO_S16SYS
short* samples = reinterpret_cast< short* >( stream );
size_t numSamples = len / sizeof( short );
const unsigned int phase_delta = 600;
static unsigned int phase = 0;
// loop over all our samples
for( size_t i = 0; i < numSamples; ++i )
{
phase+=phase_delta;
short out = 0;
if ((phase>>8)<127) out=SHRT_MAX; else out=0;
samples[i] = out;
}
}
int main( int argc, char* argv[] )
{
if( SDL_Init( SDL_INIT_VIDEO | SDL_INIT_AUDIO ) < 0 )
return -1;
atexit( SDL_Quit );
SDL_Surface* screen = SDL_SetVideoMode( 640, 480, 16, SDL_ANYFORMAT );
if( screen == NULL)
return -1;
SDL_AudioSpec spec;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.samples = 4096;
spec.callback = Callback;
spec.userdata = NULL;
SDL_AudioSpec actual;
if( SDL_OpenAudio( &spec, &actual ) < 0 )
{
cerr << "I don't think you like this: " << SDL_GetError() << endl;
return -1;
}
if( spec.format != actual.format )
{
cerr << "format mismatch!" << endl;
return -1;
}
SDL_PauseAudio( 0 );
SDL_Event ev;
while( SDL_WaitEvent( &ev ) )
{
if( ev.type == SDL_QUIT )
break;
}
SDL_CloseAudio();
SDL_Quit();
return 0;
}

How do I create an AIFF file

I have some raw sound data that I want to make into an AIFF file format. I know the specifics of the audio data. I tried creating a wave from the audio, but that didn't work. OS X does have a function to create the header, but it directly addresses a file and I might not want to do that (that and the function, SetupAIFFHeader is deprecated and unavailable in 64-bit code).
Apple's Core Audio API will create and write data to an AIFF file, and other formats. It works pretty well, but in my opinion the API is difficult to use. I'll paste some example code below, but you'd probably want to change it. AudioFileWriteBytes can write more than 2 bytes at a time. There is another wrapper API in AudioToolbox/ExtendedAudioFile.h which will let you write a format like 32 bit floats, and have it translated to an underlying format, be it AIFF/PCM or a compressed format.
double sampleRate = 44100;
double duration = ...;
long nSamples = (long)(sampleRate * duration);
// Format struct for 1 channel, 16 bit PCM audio
AudioStreamBasicDescription asbd;
memset(&asbd, 0, sizeof(asbd));
asbd.mSampleRate = sampleRate;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
CFURLRef url = makeUrl("hello.aiff");
AudioFileID audioFile;
OSStatus res;
res = AudioFileCreateWithURL(url, kAudioFileAIFFType, &asbd,
kAudioFileFlags_EraseFile, &audioFile);
checkError(res);
UInt32 numBytes = 2;
for (int i=0; i<nSamples; i++) {
SInt16 sample = ... // something between SHRT_MIN and SHRT_MAX;
sample = OSSwapHostToBigInt16(sample);
res = AudioFileWriteBytes(audioFile, false, i*2, &numBytes, &sample);
checkError(res);
}
res = AudioFileClose(audioFile);
checkError(res);
checkError is asserting that res == noErr. makeUrl looks like:
CFURLRef makeUrl(const char *cstr) {
CFStringRef path = CFStringCreateWithCString(0, cstr, kCFStringEncodingUTF8);
CFURLRef url = CFURLCreateWithFileSystemPath(NULL, path, 0, false);
CFRelease(path);
return url;
}
As much as I hate wheel-reinvention, I suspect your best bet might be to roll your own AIFF save routines.
AIFF is an extension of the old Electronic Arts EA-IFF format which was used on the Amiga; it's a series of 4-byte identifiers (similar to FOURCCs), block lengths and data payloads. The Wikipedia article is quite informative and provides links to other sites which contain detailed information about the format.
http://en.wikipedia.org/wiki/Audio_Interchange_File_Format
I was able to write a proper AIFF file. The last bit that was getting me was I was using a sizeof() for a structure's size, where the size omits the first eight bytes. I did use Apple's deprecated AIFF.h header to get the structures, and it seems that neither QuickTime X nor 7 reads the metadata I set in it.
You can see my work at PlayerPRO's PlayerPRO 6 branch. It's in a file called PPApp_AppDelegate.m in the function -createAIFFDataFromSettings:data:
Here is some C code that will create an AIFF file using the Apple CoreAudio and AudioToolbox frameworks for macOS.
#include <string.h>
#include <math.h>
#include "CoreAudio/CoreAudio.h"
#include "CoreAudio/CoreAudioTypes.h"
#include "AudioToolbox/AudioToolbox.h"
#include "AudioToolbox/AudioFile.h"
CFURLRef MakeUrl(const char *cstr);
void CheckError(OSStatus res);
AudioStreamBasicDescription asbd;
AudioFileID audioFile;
OSStatus res;
void CheckError(OSStatus result) {
if (result == noErr) return;
switch(result) {
case kAudioFileUnspecifiedError:
printf("kAudioFileUnspecifiedError");
break;
case kAudioFileUnsupportedFileTypeError:
printf("kAudioFileUnsupportedFileTypeError");
break;
case kAudioFileUnsupportedDataFormatError:
printf("kAudioFileUnsupportedDataFormatError");
break;
case kAudioFileUnsupportedPropertyError:
printf("kAudioFileUnsupportedPropertyError");
break;
case kAudioFileBadPropertySizeError:
printf("kAudioFileBadPropertySizeError");
break;
case kAudioFilePermissionsError:
printf("kAudioFilePermissionsError");
break;
case kAudioFileNotOptimizedError:
printf("kAudioFileNotOptimizedError");
break;
case kAudioFileInvalidChunkError:
printf("kAudioFileInvalidChunkError");
break;
case kAudioFileDoesNotAllow64BitDataSizeError:
printf("kAudioFileDoesNotAllow64BitDataSizeError");
break;
case kAudioFileInvalidPacketOffsetError:
printf("kAudioFileInvalidPacketOffsetError");
break;
case kAudioFileInvalidFileError:
printf("kAudioFileInvalidFileError");
break;
case kAudioFileOperationNotSupportedError:
printf("kAudioFileOperationNotSupportedError");
break;
case kAudioFileNotOpenError:
printf("kAudioFileNotOpenError");
break;
case kAudioFileEndOfFileError:
printf("kAudioFileEndOfFileError");
break;
case kAudioFilePositionError:
printf("kAudioFilePositionError");
break;
case kAudioFileFileNotFoundError:
printf("kAudioFileFileNotFoundError");
break;
default:
printf("unknown error");
break;
}
exit(result);
}
CFURLRef MakeUrl(const char *cstr) {
CFStringRef path = CFStringCreateWithCString(0, cstr, kCFStringEncodingUTF8);
CFURLRef url = CFURLCreateWithFileSystemPath(NULL, path, 0, false);
CFRelease(path);
return url;
}
int main() {
double sampleRate = 44100.0;
double duration = 10.0;
long nSamples = (long)(sampleRate * duration);
memset(&asbd, 0, sizeof(asbd));
// Format struct for 1 channel, 16 bit PCM audio
asbd.mSampleRate = sampleRate;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
CFURLRef url = MakeUrl("sinpos.aiff");
res = AudioFileCreateWithURL(url, kAudioFileAIFFType, &asbd,
kAudioFileFlags_EraseFile, &audioFile);
CheckError(res);
UInt32 numBytes = 2;
int freq = 44; // 100 for approx 440Hz, 2940 for 15Hz, 44 for 1000Hz
for (int i=0; i<nSamples; i++) {
int x = (i % freq);
double angle = 2.0*3.1459*x/freq;
double s = 1.0*32767*sin(angle);
SInt16 sample = (SInt16) s;
sample = OSSwapHostToBigInt16(sample);
res = AudioFileWriteBytes(audioFile, false, i*2, &numBytes, &sample);
CheckError(res);
}
res = AudioFileClose(audioFile);
CheckError(res);
exit(0);
}
The Makefile is as follows:
aiffcreate: aiffcreate.c
gcc -o $# $< -framework AudioToolbox -framework CoreFoundation -framework CoreAudio -lm
clean:
rm *.aiff aiffcreate || true
This can be run by simply issuing a ./aiffcreate command on the command line and a file will be created named sinpos.aiff which is a pure 1000Hz tone lasting 10 seconds.

Resources