How do I create an AIFF file - audio

I have some raw sound data that I want to make into an AIFF file format. I know the specifics of the audio data. I tried creating a wave from the audio, but that didn't work. OS X does have a function to create the header, but it directly addresses a file and I might not want to do that (that and the function, SetupAIFFHeader is deprecated and unavailable in 64-bit code).

Apple's Core Audio API will create and write data to an AIFF file, and other formats. It works pretty well, but in my opinion the API is difficult to use. I'll paste some example code below, but you'd probably want to change it. AudioFileWriteBytes can write more than 2 bytes at a time. There is another wrapper API in AudioToolbox/ExtendedAudioFile.h which will let you write a format like 32 bit floats, and have it translated to an underlying format, be it AIFF/PCM or a compressed format.
double sampleRate = 44100;
double duration = ...;
long nSamples = (long)(sampleRate * duration);
// Format struct for 1 channel, 16 bit PCM audio
AudioStreamBasicDescription asbd;
memset(&asbd, 0, sizeof(asbd));
asbd.mSampleRate = sampleRate;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
CFURLRef url = makeUrl("hello.aiff");
AudioFileID audioFile;
OSStatus res;
res = AudioFileCreateWithURL(url, kAudioFileAIFFType, &asbd,
kAudioFileFlags_EraseFile, &audioFile);
checkError(res);
UInt32 numBytes = 2;
for (int i=0; i<nSamples; i++) {
SInt16 sample = ... // something between SHRT_MIN and SHRT_MAX;
sample = OSSwapHostToBigInt16(sample);
res = AudioFileWriteBytes(audioFile, false, i*2, &numBytes, &sample);
checkError(res);
}
res = AudioFileClose(audioFile);
checkError(res);
checkError is asserting that res == noErr. makeUrl looks like:
CFURLRef makeUrl(const char *cstr) {
CFStringRef path = CFStringCreateWithCString(0, cstr, kCFStringEncodingUTF8);
CFURLRef url = CFURLCreateWithFileSystemPath(NULL, path, 0, false);
CFRelease(path);
return url;
}

As much as I hate wheel-reinvention, I suspect your best bet might be to roll your own AIFF save routines.
AIFF is an extension of the old Electronic Arts EA-IFF format which was used on the Amiga; it's a series of 4-byte identifiers (similar to FOURCCs), block lengths and data payloads. The Wikipedia article is quite informative and provides links to other sites which contain detailed information about the format.
http://en.wikipedia.org/wiki/Audio_Interchange_File_Format

I was able to write a proper AIFF file. The last bit that was getting me was I was using a sizeof() for a structure's size, where the size omits the first eight bytes. I did use Apple's deprecated AIFF.h header to get the structures, and it seems that neither QuickTime X nor 7 reads the metadata I set in it.
You can see my work at PlayerPRO's PlayerPRO 6 branch. It's in a file called PPApp_AppDelegate.m in the function -createAIFFDataFromSettings:data:

Here is some C code that will create an AIFF file using the Apple CoreAudio and AudioToolbox frameworks for macOS.
#include <string.h>
#include <math.h>
#include "CoreAudio/CoreAudio.h"
#include "CoreAudio/CoreAudioTypes.h"
#include "AudioToolbox/AudioToolbox.h"
#include "AudioToolbox/AudioFile.h"
CFURLRef MakeUrl(const char *cstr);
void CheckError(OSStatus res);
AudioStreamBasicDescription asbd;
AudioFileID audioFile;
OSStatus res;
void CheckError(OSStatus result) {
if (result == noErr) return;
switch(result) {
case kAudioFileUnspecifiedError:
printf("kAudioFileUnspecifiedError");
break;
case kAudioFileUnsupportedFileTypeError:
printf("kAudioFileUnsupportedFileTypeError");
break;
case kAudioFileUnsupportedDataFormatError:
printf("kAudioFileUnsupportedDataFormatError");
break;
case kAudioFileUnsupportedPropertyError:
printf("kAudioFileUnsupportedPropertyError");
break;
case kAudioFileBadPropertySizeError:
printf("kAudioFileBadPropertySizeError");
break;
case kAudioFilePermissionsError:
printf("kAudioFilePermissionsError");
break;
case kAudioFileNotOptimizedError:
printf("kAudioFileNotOptimizedError");
break;
case kAudioFileInvalidChunkError:
printf("kAudioFileInvalidChunkError");
break;
case kAudioFileDoesNotAllow64BitDataSizeError:
printf("kAudioFileDoesNotAllow64BitDataSizeError");
break;
case kAudioFileInvalidPacketOffsetError:
printf("kAudioFileInvalidPacketOffsetError");
break;
case kAudioFileInvalidFileError:
printf("kAudioFileInvalidFileError");
break;
case kAudioFileOperationNotSupportedError:
printf("kAudioFileOperationNotSupportedError");
break;
case kAudioFileNotOpenError:
printf("kAudioFileNotOpenError");
break;
case kAudioFileEndOfFileError:
printf("kAudioFileEndOfFileError");
break;
case kAudioFilePositionError:
printf("kAudioFilePositionError");
break;
case kAudioFileFileNotFoundError:
printf("kAudioFileFileNotFoundError");
break;
default:
printf("unknown error");
break;
}
exit(result);
}
CFURLRef MakeUrl(const char *cstr) {
CFStringRef path = CFStringCreateWithCString(0, cstr, kCFStringEncodingUTF8);
CFURLRef url = CFURLCreateWithFileSystemPath(NULL, path, 0, false);
CFRelease(path);
return url;
}
int main() {
double sampleRate = 44100.0;
double duration = 10.0;
long nSamples = (long)(sampleRate * duration);
memset(&asbd, 0, sizeof(asbd));
// Format struct for 1 channel, 16 bit PCM audio
asbd.mSampleRate = sampleRate;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
CFURLRef url = MakeUrl("sinpos.aiff");
res = AudioFileCreateWithURL(url, kAudioFileAIFFType, &asbd,
kAudioFileFlags_EraseFile, &audioFile);
CheckError(res);
UInt32 numBytes = 2;
int freq = 44; // 100 for approx 440Hz, 2940 for 15Hz, 44 for 1000Hz
for (int i=0; i<nSamples; i++) {
int x = (i % freq);
double angle = 2.0*3.1459*x/freq;
double s = 1.0*32767*sin(angle);
SInt16 sample = (SInt16) s;
sample = OSSwapHostToBigInt16(sample);
res = AudioFileWriteBytes(audioFile, false, i*2, &numBytes, &sample);
CheckError(res);
}
res = AudioFileClose(audioFile);
CheckError(res);
exit(0);
}
The Makefile is as follows:
aiffcreate: aiffcreate.c
gcc -o $# $< -framework AudioToolbox -framework CoreFoundation -framework CoreAudio -lm
clean:
rm *.aiff aiffcreate || true
This can be run by simply issuing a ./aiffcreate command on the command line and a file will be created named sinpos.aiff which is a pure 1000Hz tone lasting 10 seconds.

Related

Why is this simple audio code not creating any sound?

I am learning how to generate sound using Windows and c++. I was trying to use a simple tutorial, but the code only creates silence. What did I do wrong? I tested with some print commands (which I removed) and the code seems to run to completion. Other sounds (from other programs, of course) play fine, and there are no error messages. I think the code looks right, but I am clearly not capable of judging that...
#include <iostream>
#include <cmath>
#include <windows.h>
#include <mmsystem.h>
// Sample rate
const int SAMPLE_RATE = 44100;
// Frequency of the sine wave
const int FREQUENCY = 440;
int main()
{
// Open the default audio device for output
HWAVEOUT hWaveOut = NULL;
waveOutOpen(&hWaveOut, WAVE_MAPPER, NULL, 0, 0, CALLBACK_NULL);
// Create a WAVEHDR structure to describe the waveform data
WAVEHDR waveHdr;
memset(&waveHdr, 0, sizeof(waveHdr));
// Allocate memory for the waveform data
const int BUFFER_SIZE = SAMPLE_RATE * sizeof(short);
short* samples = (short*)malloc(BUFFER_SIZE);
// Generate the sine wave samples
for (int i = 0; i < SAMPLE_RATE; ++i)
{
float t = i / float(SAMPLE_RATE);
float sample = sin(2 * M_PI * FREQUENCY * t);
samples[i] = short(sample * 32767);
}
// Set up the WAVEHDR structure
waveHdr.lpData = (LPSTR)samples;
waveHdr.dwBufferLength = BUFFER_SIZE;
// Prepare the waveform data for playback
waveOutPrepareHeader(hWaveOut, &waveHdr, sizeof(waveHdr));
// Play the waveform data
waveOutWrite(hWaveOut, &waveHdr, sizeof(waveHdr));
// Wait until the sound finishes playing
while (waveHdr.dwFlags & WHDR_DONE != WHDR_DONE)
{
// Do nothing
}
// Clean up
waveOutUnprepareHeader(hWaveOut, &waveHdr, sizeof(waveHdr));
waveOutClose(hWaveOut);
free(samples);
return 0;
}
You need to set a WAVEFORMATEX structure before opening and playing audio (more information can be found here https://learn.microsoft.com/en-us/windows/win32/api/mmeapi/ns-mmeapi-waveformatex ).
WAVEFORMATEX wfex;
wfex.wFormatTag = WAVE_FORMAT_PCM;
wfex.nChannels = 1;
wfex.wBitsPerSample = 16; //number of bits per sample
wfex.nAvgBytesPerSec = 1 * 44100 * 2;//numChannels * numSamplesPerSec * numBytesPerSample
wfex.nBlockAlign = 1 * 2;//numChannels * numBytesPerSample;
wfex.cbSize = 0;
HWAVEOUT hWaveOut = NULL;
MMRESULT mm = waveOutOpen(&hWaveOut, WAVE_MAPPER, &wfex, 0, 0, CALLBACK_NULL);
if (mm != MMSYSERR_NOERROR){
std::string errorMsg = getWaveError(mm);
//display the message etc
}
In case of an error, you can query the mm variable and display an appropriate message that could help you identify the problem.
std::string getWaveError(MMRESULT mm){
std::string errorMsg;
switch (mm) {
case MMSYSERR_INVALHANDLE:
errorMsg = "Wave exception: Invalid handle";
break;
case MMSYSERR_NOMEM:
errorMsg = "Wave exception: Not enough memory";
break;
case MMSYSERR_NODRIVER:
errorMsg = "Wave exception: No driver present";
break;
case WAVERR_UNPREPARED:
errorMsg = "Wave exception: Unprepared buffer";
break;
case MMSYSERR_HANDLEBUSY:
errorMsg = "Wave exception: Handle busy";
break;
case MMSYSERR_ALLOCATED:
errorMsg = "Wave exception: Device already allocated";
break;
case WAVERR_BADFORMAT:
errorMsg = "Wave exception: Unsupported wave format";
break;
default:
errorMsg = "Wave exception: Unknown error: Code " + mm;
;
}
return errorMsg;
}
Hope this helps.

Is there a way to speed up audio .wav file using some SDL_Mixer's function?

I am making a simple game whose audio speed should increase as the player is approaching the end of the level it is playing. So now I was wondering if there was a way to do this using SDL_Mixer. If SDL_Mixer is not the way to go could you please tell me how could I make this change in the audio file itself to make it faster. I am working with a 8-bit .wav file with 2 channels at the samplerate of 22050.
According to this forum here: https://forums.libsdl.org/viewtopic.php?p=44663, you can use a different library called "SoLoud" to change the playback speed of your sounds on the fly. You can get/see more details on SoLoud here: http://sol.gfxile.net/soloud/. From what I can tell, you cannot do this using SDL2, and SoLoud seems easy enough to use, so that would be my suggestion.
A few years back I was trying to achieve something very similar and, after a lot of web search, I came up with this solution, involving using Mix_RegisterEffect function, which got close:
#include <SDL2/SDL.h>
#include <SDL2/SDL_mixer.h>
#include <iostream>
#include <cstdlib>
#include <cmath>
/* global vars */
Uint16 audioFormat; // current audio format constant
int audioFrequency, // frequency rate of the current audio format
audioChannelCount, // number of channels of the current audio format
audioAllocatedMixChannelsCount; // number of mix channels allocated
static inline Uint16 formatSampleSize(Uint16 format)
{
return (format & 0xFF) / 8;
}
// Get chunk time length (in ms) given its size and current audio format
static int computeChunkLengthMillisec(int chunkSize)
{
/* bytes / samplesize == sample points */
const Uint32 points = chunkSize / formatSampleSize(audioFormat);
/* sample points / channels == sample frames */
const Uint32 frames = (points / audioChannelCount);
/* (sample frames * 1000) / frequency == play length, in ms */
return ((frames * 1000) / audioFrequency);
}
// Custom handler object to control which part of the Mix_Chunk's audio data will be played, with which pitch-related modifications.
// This needed to be a template because the actual Mix_Chunk's data format may vary (AUDIO_U8, AUDIO_S16, etc) and the data type varies with it (Uint8, Sint16, etc)
// The AudioFormatType should be the data type that is compatible with the current SDL_mixer-initialized audio format.
template<typename AudioFormatType>
struct PlaybackSpeedEffectHandler
{
const AudioFormatType* const chunkData; // pointer to the chunk sample data (as array)
const float& speedFactor; // the playback speed factor
int position; // current position of the sound, in ms
const int duration; // the duration of the sound, in ms
const int chunkSize; // the size of the sound, as a number of indexes (or sample points). thinks of this as a array size when using the proper array type (instead of just Uint8*).
const bool loop; // flags whether playback should stay looping
const bool attemptSelfHalting; // flags whether playback should be halted by this callback when playback is finished
bool altered; // true if this playback has been pitched by this handler
PlaybackSpeedEffectHandler(const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
: chunkData(reinterpret_cast<AudioFormatType*>(chunk.abuf)), speedFactor(speed),
position(0), duration(computeChunkLengthMillisec(chunk.alen)),
chunkSize(chunk.alen / formatSampleSize(audioFormat)),
loop(loop), attemptSelfHalting(trySelfHalt), altered(false)
{}
// processing function to be able to change chunk speed/pitch.
void modifyStreamPlaybackSpeed(int mixChannel, void* stream, int length)
{
AudioFormatType* buffer = static_cast<AudioFormatType*>(stream);
const int bufferSize = length / sizeof(AudioFormatType); // buffer size (as array)
const int bufferDuration = computeChunkLengthMillisec(length); // buffer time duration
const float speedFactor = this->speedFactor; // take a "snapshot" of speed factor
// if there is still sound to be played
if(position < duration || loop)
{
// if playback is unaltered and pitch is required (for the first time)
if(!altered && speedFactor != 1.0f)
altered = true; // flags playback modification and proceed to the pitch routine.
if(altered) // if unaltered, this pitch routine is skipped
{
const float delta = 1000.0/audioFrequency, // normal duration of each sample
vdelta = delta*speedFactor; // virtual stretched duration, scaled by 'speedFactor'
for(int i = 0; i < bufferSize; i += audioChannelCount)
{
const int j = i/audioChannelCount; // j goes from 0 to size/channelCount, incremented 1 by 1
const float x = position + j*vdelta; // get "virtual" index. its corresponding value will be interpolated.
const int k = floor(x / delta); // get left index to interpolate from original chunk data (right index will be this plus 1)
const float proportion = (x / delta) - k; // get the proportion of the right value (left will be 1.0 minus this)
// usually just 2 channels: 0 (left) and 1 (right), but who knows...
for(int c = 0; c < audioChannelCount; c++)
{
// check if k will be within bounds
if(k*audioChannelCount + audioChannelCount - 1 < chunkSize || loop)
{
AudioFormatType leftValue = chunkData[( k * audioChannelCount + c) % chunkSize],
rightValue = chunkData[((k+1) * audioChannelCount + c) % chunkSize];
// put interpolated value on 'data' (linear interpolation)
buffer[i + c] = (1-proportion)*leftValue + proportion*rightValue;
}
else // if k will be out of bounds (chunk bounds), it means we already finished; thus, we'll pass silence
{
buffer[i + c] = 0;
}
}
}
}
// update position
position += bufferDuration * speedFactor; // this is not exact since a frame may play less than its duration when finished playing, but its simpler
// reset position if looping
if(loop) while(position > duration)
position -= duration;
}
else // if we already played the whole sound but finished earlier than expected by SDL_mixer (due to faster playback speed)
{
// set silence on the buffer since Mix_HaltChannel() poops out some of it for a few ms.
for(int i = 0; i < bufferSize; i++)
buffer[i] = 0;
if(attemptSelfHalting)
Mix_HaltChannel(mixChannel); // XXX unsafe call, since it locks audio; but no safer solution was found yet...
}
}
// Mix_EffectFunc_t callback that redirects to handler method (handler passed via userData)
static void mixEffectFuncCallback(int channel, void* stream, int length, void* userData)
{
static_cast<PlaybackSpeedEffectHandler*>(userData)->modifyStreamPlaybackSpeed(channel, stream, length);
}
// Mix_EffectDone_t callback that deletes the handler at the end of the effect usage (handler passed via userData)
static void mixEffectDoneCallback(int, void *userData)
{
delete static_cast<PlaybackSpeedEffectHandler*>(userData);
}
// function to register a handler to this channel for the next playback.
static void registerEffect(int channel, const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
{
Mix_RegisterEffect(channel, mixEffectFuncCallback, mixEffectDoneCallback, new PlaybackSpeedEffectHandler(chunk, speed, loop, trySelfHalt));
}
};
// Register playback speed effect handler according to the current audio format; effect valid for a single playback; if playback is looped, lasts until it's halted
void setupPlaybackSpeedEffect(const Mix_Chunk* const chunk, const float& speed, int channel, bool loop=false, bool trySelfHalt=false)
{
// select the register function for the current audio format and register the effect using the compatible handlers
// XXX is it correct to behave the same way to all S16 and U16 formats? Should we create case statements for AUDIO_S16SYS, AUDIO_S16LSB, AUDIO_S16MSB, etc, individually?
switch(audioFormat)
{
case AUDIO_U8: PlaybackSpeedEffectHandler<Uint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_S8: PlaybackSpeedEffectHandler<Sint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_U16: PlaybackSpeedEffectHandler<Uint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
default:
case AUDIO_S16: PlaybackSpeedEffectHandler<Sint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_S32: PlaybackSpeedEffectHandler<Sint32>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
case AUDIO_F32: PlaybackSpeedEffectHandler<float >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
}
}
// example
// run the executable passing an filename of a sound file that SDL_mixer is able to open (ogg, wav, ...)
int main(int argc, char** argv)
{
if(argc < 2) { std::cout << "missing argument" << std::endl; return 0; }
SDL_Init(SDL_INIT_AUDIO);
Mix_OpenAudio(MIX_DEFAULT_FREQUENCY, MIX_DEFAULT_FORMAT, MIX_DEFAULT_CHANNELS, 4096);
Mix_QuerySpec(&audioFrequency, &audioFormat, &audioChannelCount); // query specs
audioAllocatedMixChannelsCount = Mix_AllocateChannels(MIX_CHANNELS);
float speed = 1.0;
Mix_Chunk* chunk = Mix_LoadWAV(argv[1]);
if(chunk != NULL)
{
const int channel = Mix_PlayChannelTimed(-1, chunk, -1, 8000);
setupPlaybackSpeedEffect(chunk, speed, channel, true);
// loop for 8 seconds, changing the pitch dynamically
while(SDL_GetTicks() < 8000)
speed = 1 + 0.25*sin(0.001*SDL_GetTicks());
}
else
std::cout << "no data" << std::endl;
Mix_FreeChunk(chunk);
Mix_CloseAudio();
Mix_Quit();
SDL_Quit();
return EXIT_SUCCESS;
}
While this works, it's not a perfect solution, since the result has some artifacts (crackling) in most cases, which I wasn't able to figure out why.
Github gist I created for this a while ago.

C++ FFmpeg distorted sound when converting audio

I'm using the FFmpeg library to generate MP4 files containing audio from various files, such as MP3, WAV, OGG, but I'm having some troubles (I'm also putting video in there, but for simplicity's sake I'm omitting that for this question, since I've got that working). My current code opens an audio file, decodes the content and converts it into the MP4 container and finally writes it into the destination file as interleaved frames.
It works perfectly for most MP3 files, but when inputting WAV or OGG, the audio in the resulting MP4 is slightly distorted and often plays at the wrong speed (up to many times faster or slower).
I've looked at countless of examples of using the converting functions (swr_convert), but I can't seem to get rid of the noise in the exported audio.
Here's how I add an audio stream to the MP4 (outContext is the AVFormatContext for the output file):
audioCodec = avcodec_find_encoder(outContext->oformat->audio_codec);
if (!audioCodec)
die("Could not find audio encoder!");
// Start stream
audioStream = avformat_new_stream(outContext, audioCodec);
if (!audioStream)
die("Could not allocate audio stream!");
audioCodecContext = audioStream->codec;
audioStream->id = 1;
// Setup
audioCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
audioCodecContext->bit_rate = 128000;
audioCodecContext->sample_rate = 44100;
audioCodecContext->channels = 2;
audioCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
// Open the codec
if (avcodec_open2(audioCodecContext, audioCodec, NULL) < 0)
die("Could not open audio codec");
And to open a sound file from MP3/WAV/OGG (from the filename variable)...
// Create contex
formatContext = avformat_alloc_context();
if (avformat_open_input(&formatContext, filename, NULL, NULL)<0)
die("Could not open file");
// Find info
if (avformat_find_stream_info(formatContext, 0)<0)
die("Could not find file info");
av_dump_format(formatContext, 0, filename, false);
// Find audio stream
streamId = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (streamId < 0)
die("Could not find Audio Stream");
codecContext = formatContext->streams[streamId]->codec;
// Find decoder
codec = avcodec_find_decoder(codecContext->codec_id);
if (codec == NULL)
die("cannot find codec!");
// Open codec
if (avcodec_open2(codecContext, codec, 0)<0)
die("Codec cannot be found");
// Set up resample context
swrContext = swr_alloc();
if (!swrContext)
die("Failed to alloc swr context");
av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "in_channel_layout", codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", codecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codecContext->sample_fmt, 0);
av_opt_set_int(swrContext, "out_channel_count", audioCodecContext->channels, 0);
av_opt_set_int(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
if (swr_init(swrContext))
die("Failed to init swr context");
Finally, to decode+convert+encode...
// Allocate and init re-usable frames
audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted)
die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0)
die("Could not convert");
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0)
die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
I have also tried setting appropriate pts values for outgoing frames, but that doesn't seem to affect the sound quality at all.
I'm also unsure how/if I should be allocating the converted data, can av_samples_alloc be used for this? What about avcodec_fill_audio_frame? Am I on the right track?
Any input is appreciated (I can also send the exported MP4s if necessary, if you want to hear the distortion).
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
You seem to be assuming that the encoder will eat all submitted samples - it doesn't. It also doesn't cache them internally. It will eat a specific number of samples (AVCodecContext.frame_size), and the rest should be resubmitted in the next call to avcodec_encode_audio2().
[edit]
ok, so your edited code is better, but not there yet. You're still assuming the decoder will output at least frame_size samples for each call to avcodec_decode_audioN() (after resampling), which may not be the case. If that happens (and it does, for ogg), your avcodec_encode_audioN() call will encode an incomplete input buffer (because you say it's got frame_size samples, but it doesn't). Likewise, your code also doesn't deal with cases where the decoder outputs a number significantly bigger than frame_size (like 10*frame_size) expected by the encoder, in which case you'll get overruns - basically your 1:1 decode/encode mapping is the main source of your problem.
As a solution, consider the swrContext a FIFO, where you input all decoder samples, and loop over it until it's got less than frame_size samples left. I'll leave it up to you to learn how to deal with end-of-stream, because you'll need to flush cached samples out of the decoder (by calling avcodec_decode_audioN() with AVPacket where .data = NULL and .size = 0), flush the swrContext (by calling swr_context() until it returns 0) as well as flush the encoder (by feeding it NULL AVFrames until it returns AVPacket with .size = 0). Right now you'll probably get an output file where the end is slightly truncated. That shouldn't be hard to figure out.
This code works for me for m4a/ogg/mp3 to m4a/aac conversion:
#include "libswresample/swresample.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/opt.h"
#include <stdio.h>
#include <stdlib.h>
static void die(char *str) {
fprintf(stderr, "%s\n", str);
exit(1);
}
static AVStream *add_audio_stream(AVFormatContext *oc, enum AVCodecID codec_id)
{
AVCodecContext *c;
AVCodec *encoder = avcodec_find_encoder(codec_id);
AVStream *st = avformat_new_stream(oc, encoder);
if (!st) die("av_new_stream");
c = st->codec;
c->codec_id = codec_id;
c->codec_type = AVMEDIA_TYPE_AUDIO;
/* put sample parameters */
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 2;
c->sample_fmt = encoder->sample_fmts[0];
c->channel_layout = AV_CH_LAYOUT_STEREO;
// some formats want stream headers to be separate
if(oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
return st;
}
static void open_audio(AVFormatContext *oc, AVStream *st)
{
AVCodecContext *c = st->codec;
AVCodec *codec;
/* find the audio encoder */
codec = avcodec_find_encoder(c->codec_id);
if (!codec) die("avcodec_find_encoder");
/* open it */
AVDictionary *dict = NULL;
av_dict_set(&dict, "strict", "+experimental", 0);
int res = avcodec_open2(c, codec, &dict);
if (res < 0) die("avcodec_open");
}
int main(int argc, char *argv[]) {
av_register_all();
if (argc != 3) {
fprintf(stderr, "%s <in> <out>\n", argv[0]);
exit(1);
}
// Allocate and init re-usable frames
AVCodecContext *fileCodecContext, *audioCodecContext;
AVFormatContext *formatContext, *outContext;
AVStream *audioStream;
SwrContext *swrContext;
int streamId;
// input file
const char *file = argv[1];
int res = avformat_open_input(&formatContext, file, NULL, NULL);
if (res != 0) die("avformat_open_input");
res = avformat_find_stream_info(formatContext, NULL);
if (res < 0) die("avformat_find_stream_info");
AVCodec *codec;
res = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (res < 0) die("av_find_best_stream");
streamId = res;
fileCodecContext = avcodec_alloc_context3(codec);
avcodec_copy_context(fileCodecContext, formatContext->streams[streamId]->codec);
res = avcodec_open2(fileCodecContext, codec, NULL);
if (res < 0) die("avcodec_open2");
// output file
const char *outfile = argv[2];
AVOutputFormat *fmt = fmt = av_guess_format(NULL, outfile, NULL);
if (!fmt) die("av_guess_format");
outContext = avformat_alloc_context();
outContext->oformat = fmt;
audioStream = add_audio_stream(outContext, fmt->audio_codec);
open_audio(outContext, audioStream);
res = avio_open2(&outContext->pb, outfile, AVIO_FLAG_WRITE, NULL, NULL);
if (res < 0) die("url_fopen");
avformat_write_header(outContext, NULL);
audioCodecContext = audioStream->codec;
// resampling
swrContext = swr_alloc();
av_opt_set_channel_layout(swrContext, "in_channel_layout", fileCodecContext->channel_layout, 0);
av_opt_set_channel_layout(swrContext, "out_channel_layout", audioCodecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_sample_rate", fileCodecContext->sample_rate, 0);
av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", fileCodecContext->sample_fmt, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
res = swr_init(swrContext);
if (res < 0) die("swr_init");
AVFrame *audioFrameDecoded = av_frame_alloc();
if (!audioFrameDecoded)
die("Could not allocate audio frame");
audioFrameDecoded->format = fileCodecContext->sample_fmt;
audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
audioFrameDecoded->channels = fileCodecContext->channels;
audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;
AVFrame *audioFrameConverted = av_frame_alloc();
if (!audioFrameConverted) die("Could not allocate audio frame");
audioFrameConverted->nb_samples = audioCodecContext->frame_size;
audioFrameConverted->format = audioCodecContext->sample_fmt;
audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
audioFrameConverted->channels = audioCodecContext->channels;
audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
AVPacket inPacket;
av_init_packet(&inPacket);
inPacket.data = NULL;
inPacket.size = 0;
int frameFinished = 0;
while (av_read_frame(formatContext, &inPacket) >= 0) {
if (inPacket.stream_index == streamId) {
int len = avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
if (frameFinished) {
// Convert
uint8_t *convertedData=NULL;
if (av_samples_alloc(&convertedData,
NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt, 0) < 0)
die("Could not allocate samples");
int outSamples = swr_convert(swrContext, NULL, 0,
//&convertedData,
//audioFrameConverted->nb_samples,
(const uint8_t **)audioFrameDecoded->data,
audioFrameDecoded->nb_samples);
if (outSamples < 0) die("Could not convert");
for (;;) {
outSamples = swr_get_out_samples(swrContext, 0);
if (outSamples < audioCodecContext->frame_size * audioCodecContext->channels) break; // see comments, thanks to #dajuric for fixing this
outSamples = swr_convert(swrContext,
&convertedData,
audioFrameConverted->nb_samples, NULL, 0);
size_t buffer_size = av_samples_get_buffer_size(NULL,
audioCodecContext->channels,
audioFrameConverted->nb_samples,
audioCodecContext->sample_fmt,
0);
if (buffer_size < 0) die("Invalid buffer size");
if (avcodec_fill_audio_frame(audioFrameConverted,
audioCodecContext->channels,
audioCodecContext->sample_fmt,
convertedData,
buffer_size,
0) < 0)
die("Could not fill frame");
AVPacket outPacket;
av_init_packet(&outPacket);
outPacket.data = NULL;
outPacket.size = 0;
if (avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished) < 0)
die("Error encoding audio frame");
if (frameFinished) {
outPacket.stream_index = audioStream->index;
if (av_interleaved_write_frame(outContext, &outPacket) != 0)
die("Error while writing audio frame");
av_free_packet(&outPacket);
}
}
}
}
}
swr_close(swrContext);
swr_free(&swrContext);
av_frame_free(&audioFrameConverted);
av_frame_free(&audioFrameDecoded);
av_free_packet(&inPacket);
av_write_trailer(outContext);
avio_close(outContext->pb);
avcodec_close(fileCodecContext);
avcodec_free_context(&fileCodecContext);
avformat_close_input(&formatContext);
return 0;
}
I wanted to include a couple things I found when I was working with the above code.
I had one file get stuck in an infinite loop. The reason is the file had a sample rate of 48000 and the code changes it to a 44100. This caused it to always have extra outSamples. swr_convert & would not grab them. So I ended up changing add_audio_stream to match the input streams sample rate.
c->sample_rate = fileCodecContext->sample_rate;
Also I had to produce wav files as my output. And it had a framesize of 0. so I just chose a number after a few tests I went with 32. I noticed if I went too big (ex 128) I would get audio glitches.
if (audioFrameConverted->nb_samples <= 0) audioFrameConverted->nb_samples = 32; //wav files have a 0
Changed the if statement that breaks out of the loop to check nb_samples if frame_size is 0.
if ((outSamples < audioCodecContext->frame_size * audioCodecContext->channels) || audioCodecContext->frame_size==0 && (outSamples < audioFrameConverted->nb_samples * audioCodecContext->channels)) break; // see comments, thanks to #dajuric for fixing this
There was also a glitch when I was testing outputting to ogg files where the timestamp data was missing so the file wouldn't play correctly in vlc. There were a few lines I added that helped with that.
out_audioStream->time_base = in_audioStream->time_base; // entered before avio_open.
outPacket.dts = audioFrameDecoded->pkt_dts;//rest after avcodec_encode_audio2
outPacket.pts = audioFrameDecoded->pkt_pts;
av_packet_rescale_ts(&outPacket, in_audioStream->time_base, out_audioStream->time_base);
Variables might be a little different I converted the code to c#. Thought this might help someone.
Actually swr_convert won't work for that, try to use swr_convert_frame instead.

Does OpenAL-Soft have an upper limit on the number of sources?

I'm using OpenAL-Soft for a project, and right now I'm trying to decide whether I need to implement OpenAL source pooling.
Source pooling is somewhat cumbersome (I need to write code to "allocate" sources, as well as somehow decide when they can be "freed"), but necessary if the number of sources that can be generated by OpenAL is limited.
Since OpenAL-Soft is a software implementation of the OpenAL API, I wonder if the number of sources it can generate is actually limited by the underlying hardware. Theoretically, since all mixing is done in software, there might be no need to actually use one hardware channel per source.
However, I'm not sure about it. How should I proceed?
It appears that OpenAL-Soft indeed does have an upper limit on the number of sources, which can be defined in a config file. The default seems to be 256. It makes sense to limit the number of sources because of the associated CPU and memory costs. Looks like I'll end up implementing a source pool after all.
I just took a peek at its header ... did not see anything pop out.
Here is working code which synthesizes then renders audio buffer data ... you could play with seeing if it accommodates your necessary number of sources
// gcc -o openal_play_wed openal_play_wed.c -lopenal -lm
#include <stdio.h>
#include <stdlib.h> // gives malloc
#include <math.h>
#ifdef __APPLE__
#include <OpenAL/al.h>
#include <OpenAL/alc.h>
#elif __linux
#include <AL/al.h>
#include <AL/alc.h>
#endif
ALCdevice * openal_output_device;
ALCcontext * openal_output_context;
ALuint internal_buffer;
ALuint streaming_source[1];
int al_check_error(const char * given_label) {
ALenum al_error;
al_error = alGetError();
if(AL_NO_ERROR != al_error) {
printf("ERROR - %s (%s)\n", alGetString(al_error), given_label);
return al_error;
}
return 0;
}
void MM_init_al() {
const char * defname = alcGetString(NULL, ALC_DEFAULT_DEVICE_SPECIFIER);
openal_output_device = alcOpenDevice(defname);
openal_output_context = alcCreateContext(openal_output_device, NULL);
alcMakeContextCurrent(openal_output_context);
// setup buffer and source
alGenBuffers(1, & internal_buffer);
al_check_error("failed call to alGenBuffers");
}
void MM_exit_al() {
ALenum errorCode = 0;
// Stop the sources
alSourceStopv(1, & streaming_source[0]); // streaming_source
int ii;
for (ii = 0; ii < 1; ++ii) {
alSourcei(streaming_source[ii], AL_BUFFER, 0);
}
// Clean-up
alDeleteSources(1, &streaming_source[0]);
alDeleteBuffers(16, &streaming_source[0]);
errorCode = alGetError();
alcMakeContextCurrent(NULL);
errorCode = alGetError();
alcDestroyContext(openal_output_context);
alcCloseDevice(openal_output_device);
}
void MM_render_one_buffer() {
/* Fill buffer with Sine-Wave */
// float freq = 440.f;
float freq = 100.f;
float incr_freq = 0.1f;
int seconds = 4;
// unsigned sample_rate = 22050;
unsigned sample_rate = 44100;
double my_pi = 3.14159;
size_t buf_size = seconds * sample_rate;
short * samples = malloc(sizeof(short) * buf_size);
printf("\nhere is freq %f\n", freq);
int i=0;
for(; i<buf_size; ++i) {
samples[i] = 32760 * sin( (2.f * my_pi * freq)/sample_rate * i );
freq += incr_freq;
// incr_freq += incr_freq;
// freq *= factor_freq;
if (100.0 > freq || freq > 5000.0) {
incr_freq *= -1.0f;
}
}
/* upload buffer to OpenAL */
alBufferData( internal_buffer, AL_FORMAT_MONO16, samples, buf_size, sample_rate);
al_check_error("populating alBufferData");
free(samples);
/* Set-up sound source and play buffer */
// ALuint src = 0;
// alGenSources(1, &src);
// alSourcei(src, AL_BUFFER, internal_buffer);
alGenSources(1, & streaming_source[0]);
alSourcei(streaming_source[0], AL_BUFFER, internal_buffer);
// alSourcePlay(src);
alSourcePlay(streaming_source[0]);
// ---------------------
ALenum current_playing_state;
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
while (AL_PLAYING == current_playing_state) {
printf("still playing ... so sleep\n");
sleep(1); // should use a thread sleep NOT sleep() for a more responsive finish
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
}
printf("end of playing\n");
/* Dealloc OpenAL */
MM_exit_al();
} // MM_render_one_buffer
int main() {
MM_init_al();
MM_render_one_buffer();
}

how to convert byte* into jpeg file in VC++

how to convert byte* into jpeg file in VC++
i am capturing Video samples and writing it as bmp files, but i want to write that video samples into jpeg file using MFC support in ATL COM.
Use libjpg. Download from: http://www.ijg.org/
From what it appears, you have the image data in a buffer pointed to by a byte object. Note, that the type actually is BYTE (all uppercase). If the data is in JPEG format already why don't you write that data out to a file (with a suitable '.jpg' or '.jpeg' extension) and try loading it with an image editor? Otherwise, you will need to decode that to raw format and encode in the JPEG format.
Or, you need to explain you problem in more detail, preferably with some code.
Raw image data to JPEG can be acheived by ImageMagick.
You may also try to use CxImage C++ class to save your stills to JPEG-encoded file.
There are some more Windows API oriented alternatives available on CodeProject, for instance CMiniJpegEncoder
It is even possible to render JPEG to file from Windows bitmap using libgd library if compiled with libjpeg support. Here is code of small extension function gdImageTrueColorAttachBuffer I developed for this purpose some time ago:
// libgd ext// libgd extension by Mateusz Loskot <mateusz at loskot dot net>
// Originally developed for Windows CE to enable direct drawing
// on Windows API Device Context using libgd API.
// Complete example available in libgd CVS:
// http://cvs.php.net/viewvc.cgi/gd/libgd/examples/windows.c?diff_format=u&revision=1.1&view=markup
//
gdImagePtr gdImageTrueColorAttachBuffer(int* buffer, int sx, int sy, int stride)
{
int i;
int height;
int* rowptr;
gdImagePtr im;
im = (gdImage *) malloc (sizeof (gdImage));
if (!im) {
return 0;
}
memset (im, 0, sizeof (gdImage));
#if 0
if (overflow2(sizeof (int *), sy)) {
return 0;
}
#endif
im->tpixels = (int **) malloc (sizeof (int *) * sy);
if (!im->tpixels) {
free(im);
return 0;
}
im->polyInts = 0;
im->polyAllocated = 0;
im->brush = 0;
im->tile = 0;
im->style = 0;
height = sy;
rowptr = buffer;
if (stride < 0) {
int startoff = (height - 1) * stride;
rowptr = buffer - startoff;
}
i = 0;
while (height--) {
im->tpixels[i] = rowptr;
rowptr += stride;
i++;
}
im->sx = sx;
im->sy = sy;
im->transparent = (-1);
im->interlace = 0;
im->trueColor = 1;
im->saveAlphaFlag = 0;
im->alphaBlendingFlag = 1;
im->thick = 1;
im->AA = 0;
im->cx1 = 0;
im->cy1 = 0;
im->cx2 = im->sx - 1;
im->cy2 = im->sy - 1;
return im;
}
void gdSaveJPEG(void* bits, int width, int height, const char* filename)
{
bool success = false;
int stride = ((width * 1 + 3) >> 2) << 2;
gdImage* im = gdImageTrueColorAttachBuffer((int*)bits, width, height, -stride);
if (0 != im)
{
FILE* jpegout = fopen(filename, "wb");
gdImageJpeg(im, jpegout, -1);
fclose(jpegout);
success = true;
}
gdImageDestroy(im);
return success;
}
I hope it helps.

Resources