Using WASAPI api to capture voice input through microphone, but just getting some noise - wasapi

I am new to WASAPI, following the msdn reference code, here,
http://msdn.microsoft.com/en-us/library/windows/desktop/dd370800(v=vs.85).aspx, to capture audio using WASAPI apis.
Modified msdn reference code slightly for my purpose. I am using a microphone to record my voice, and play it back, it works fine when using SoundRecorder and other Windows in-built apps, but using my test application, not getting any valid sound, just getting some noise.
Here is my code, please let me know, where could I be going wrong:
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define TIME_COUNTER_LIMIT 20
WAVEFORMATEX sinWaveFormat;
CWaveFile sinwave;
HRESULT RecordAudioStream()
{
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice = NULL;
IAudioClient *pAudioClient = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *pwfx = NULL;
UINT32 packetLength = 0;
UINT32 time_counter = 0;
BYTE *pData;
DWORD flags;
UINT32 bytesToCapture = 0;
UINT64 u64DevicePosition = 0;
UINT64 u64QPCPosition = 0;
BYTE temp_buffer[10000];
CoInitializeEx(NULL, COINIT_MULTITHREADED);
hr = CoCreateInstance(
__uuidof(MMDeviceEnumerator), NULL,
CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),
(void**)&pEnumerator);
EXIT_ON_ERROR(hr)
hr = pEnumerator->GetDefaultAudioEndpoint(
eCapture, eConsole, &pDevice);
EXIT_ON_ERROR(hr)
hr = pDevice->Activate(
__uuidof(IAudioClient), CLSCTX_ALL,
NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr)
// convert from Float to PCM and from WAVEFORMATEXTENSIBLE to WAVEFORMATEX
if ((pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) ||
((pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) &&
(reinterpret_cast<WAVEFORMATEXTENSIBLE *>(pwfx)->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)))
{
pwfx->wFormatTag = WAVE_FORMAT_PCM;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * 2; // (nChannels * wBitsPerSample) / 8
pwfx->nAvgBytesPerSec = pwfx->nSamplesPerSec * pwfx->nBlockAlign;
pwfx->cbSize = 0;
}
hr = open_capture_file(pwfx);
EXIT_ON_ERROR(hr)
hr = pAudioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
0,
hnsRequestedDuration,
0,
pwfx,
NULL);
EXIT_ON_ERROR(hr)
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(
__uuidof(IAudioCaptureClient),
(void**)&pCaptureClient);
EXIT_ON_ERROR(hr)
/*
// Notify the audio sink which format to use.
hr = pMySink->SetFormat(pwfx);
EXIT_ON_ERROR(hr)
*/
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC *
bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr)
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
bytesToCapture = packetLength * pwfx->nBlockAlign;
while (packetLength != 0 && time_counter <= TIME_COUNTER_LIMIT)
{
time_counter++;
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(
&pData,
&numFramesAvailable,
&flags, &u64DevicePosition, &u64QPCPosition);
EXIT_ON_ERROR(hr)
if (packetLength != numFramesAvailable)
{
printf("packetlength = %d, numFramesAvailable = %d, does not match.\n", packetLength, numFramesAvailable);
bytesToCapture = numFramesAvailable * pwfx->nBlockAlign;
}
printf("packetlength = %d, numFramesAvailable = %d, bytesToCapture = %d.\n",
packetLength, numFramesAvailable, bytesToCapture);
if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
{
memset(pData, 0, numFramesAvailable * pwfx->nBlockAlign);
}
if (bytesToCapture > sizeof(temp_buffer))
{
printf("bytesToCapture = %d, more than buffer size = %d\n.", bytesToCapture, sizeof(temp_buffer));
continue;
}
memcpy(temp_buffer, pData, bytesToCapture);
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
// Copy the available capture data to the audio sink.
hr = write_to_file(
temp_buffer, bytesToCapture);
EXIT_ON_ERROR(hr)
// Sleep for half the buffer duration.
//Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
bytesToCapture = packetLength * pwfx->nBlockAlign;
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pCaptureClient);
return hr;
}

Related

How to capture a certain region of screen

Ihave to record a certain area of a screen and save the frames (20-30fps) of the video being played. Currently I'm able to capture the screen at the required frame rate using the following code, However I'm not really sure about how I can capture a certain region.
HRESULT SavePixelsToFile32bppPBGRA(UINT width, UINT height, UINT stride, BYTE, *pixels, LPWSTR filePath, const GUID &format)
{
if (!filePath || !pixels)
return E_INVALIDARG;
HRESULT hr = S_OK;
IWICImagingFactory *factory = nullptr;
IWICBitmapEncoder *encoder = nullptr;
IWICBitmapFrameEncode *frame = nullptr;
IWICStream *stream = nullptr;
GUID pf = GUID_WICPixelFormat32bppPBGRA;
BOOL coInit = CoInitialize(nullptr);
CoCreateInstance(CLSID_WICImagingFactory, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&factory));
factory->CreateStream(&stream);
stream->InitializeFromFilename(filePath, GENERIC_WRITE);
factory->CreateEncoder(format, nullptr, &encoder);
encoder->Initialize(stream, WICBitmapEncoderNoCache);
encoder->CreateNewFrame(&frame, nullptr); // we don't use options here
frame->Initialize(nullptr); // we dont' use any options here
frame->SetSize(width, height);
frame->SetPixelFormat(&pf);
frame->WritePixels(height, stride, stride * height, pixels);
frame->Commit();
encoder->Commit();
RELEASE(stream);
RELEASE(frame);
RELEASE(encoder);
RELEASE(factory);
if (coInit) CoUninitialize();
return hr;
}
HRESULT Direct3D9TakeScreenshots(UINT adapter, int count)
{
HRESULT hr = S_OK;
IDirect3D9 *d3d = nullptr;
IDirect3DDevice9 *device = nullptr;
IDirect3DSurface9 *surface = nullptr;
D3DPRESENT_PARAMETERS parameters = { 0 };
D3DDISPLAYMODE mode;
D3DLOCKED_RECT rc;
UINT pitch;
SYSTEMTIME st;
BYTE *shot = nullptr;
// init D3D and get screen size
d3d = Direct3DCreate9(D3D_SDK_VERSION);
d3d->GetAdapterDisplayMode(adapter, &mode);
parameters.Windowed = TRUE;
parameters.BackBufferCount = 1;
parameters.BackBufferHeight = mode.Height;
parameters.BackBufferWidth = mode.Width;
parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
parameters.hDeviceWindow = NULL;
// create device & capture surface
d3d->CreateDevice(adapter, D3DDEVTYPE_HAL, NULL, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &parameters, &device);
device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, nullptr);
// compute the required buffer size
surface->LockRect(&rc, NULL, 0);
pitch = rc.Pitch;
surface->UnlockRect();
// allocate screenshots buffers
shot = new BYTE[pitch * mode.Height];
// get the data
device->GetFrontBufferData(0, surface);
// copy it into our buffers
surface->LockRect(&rc, NULL, 0);
CopyMemory(shot, rc.pBits, rc.Pitch * mode.Height);
surface->UnlockRect();
WCHAR file[100];
wsprintf(file, L"C:/Frames/cap%i.png", count);
SavePixelsToFile32bppPBGRA(mode.Width, mode.Height, pitch, shot, file, GUID_ContainerFormatPng);
if (shot != nullptr)
{
delete shot;
}
RELEASE(surface);
RELEASE(device);
RELEASE(d3d);
return hr;
}
I know I can open the saved images and crop them and then again save them, but that will take up extra time. Is there a way to select a desired region

FFMPEG- Duration of audio file is inaccurate

I have video file (mp4). I want to detach audio stream (AAC format) from that file and save in PC.
With below code, Generated aac file canplay now on KM player, but can not play on VLC player. Information of duration displays on player is wrong.
Please help me with this problem.
err = avformat_open_input(input_format_context, filename, NULL, NULL);
if (err < 0) {
return err;
}
/* If not enough info to get the stream parameters, we decode the
first frames to get it. (used in mpeg case for example) */
ret = avformat_find_stream_info(*input_format_context, 0);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "%s: could not find codec parameters\n", filename);
return ret;
}
/* dump the file content */
av_dump_format(*input_format_context, 0, filename, 0);
for (size_t i = 0; i < (*input_format_context)->nb_streams; i++) {
AVStream *st = (*input_format_context)->streams[i];
if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
*input_codec_context = st->codec;
*input_audio_stream = st;
FILE *file = NULL;
file = fopen("C:\\Users\\MyPC\\Downloads\\Test.aac", "wb");
AVPacket reading_packet;
av_init_packet(&reading_packet);
while (av_read_frame(*input_format_context, &reading_packet) == 0) {
if (reading_packet.stream_index == (int) i) {
uint8_t adts_header[7];
unsigned int obj_type = 0;
unsigned int num_data_block = (reading_packet.size)/1024;
int rate_idx = st->codec->sample_rate, channels = st->codec->channels;
uint16_t frame_length;
// include the header length also
frame_length = reading_packet.size + 7;
/* We want the same metadata */
/* Generate ADTS header */
if(adts_header == NULL) return -1;
/* Sync point over a full byte */
adts_header[0] = 0xFF;
/* Sync point continued over first 4 bits + static 4 bits
* (ID, layer, protection)*/
adts_header[1] = 0xF1;
/* Object type over first 2 bits */
adts_header[2] = obj_type << 6;
/* rate index over next 4 bits */
adts_header[2] |= (rate_idx << 2);
/* channels over last 2 bits */
adts_header[2] |= (channels & 0x4) >> 2;
/* channels continued over next 2 bits + 4 bits at zero */
adts_header[3] = (channels & 0x3) << 6;
/* frame size over last 2 bits */
adts_header[3] |= (frame_length & 0x1800) >> 11;
/* frame size continued over full byte */
adts_header[4] = (frame_length & 0x1FF8) >> 3;
/* frame size continued first 3 bits */
adts_header[5] = (frame_length & 0x7) << 5;
/* buffer fullness (0x7FF for VBR) over 5 last bits*/
adts_header[5] |= 0x1F;
/* buffer fullness (0x7FF for VBR) continued over 6 first bits + 2 zeros
* number of raw data blocks */
adts_header[6] = 0xFA;
adts_header[6] |= num_data_block & 0x03; // Set raw Data blocks.
fwrite(adts_header, 1, 7, file);
fwrite(reading_packet.data, 1, reading_packet.size, file);
}
av_free_packet(&reading_packet);
}
fclose(file);
return 0;
}
}
Object type and sample rate index must be set to the real, correct values. Both values can be parsed out of the audio specific config in the extradata field in the codec context. All the information you need is here: http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio

Capturing latency data with Core Audio

I wrote this code to create an audio file sine.aif using Audio ToolBox framework.
I'm interested in capturing the data latency of this file without running it my phone, just running it on the command line and capturing the time. Is this possible? I do not have much knowledge with Core Audio.
Here is the code:
#import <Foundation/Foundation.h>
#import <AudioToolbox/AudioToolbox.h>
#define SAMPLE_RATE 44100
#define DURATION 5.0
#define FILENAME_FORMAT #"%0.3f-sine.aif"
int main(int argc, const char * argv[]) {
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc]init];
if(argc < 2) {
printf("Usage:CAToneFileGenerator 261.526\n");
return -1;
}
double hz = atof(argv[1]);
assert (hz > 0);
NSLog (#"generating %f hz tone", hz);
NSString *fileName = [NSString stringWithFormat:FILENAME_FORMAT, hz];
NSString *filePath = [[[NSFileManager defaultManager]currentDirectoryPath]stringByAppendingPathComponent:fileName];
NSURL *fileURL = [NSURL fileURLWithPath:filePath];
//Prepare the format
AudioStreamBasicDescription asbd;
memset(&asbd, 0, sizeof(asbd));
asbd.mSampleRate = SAMPLE_RATE;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
asbd.mBitsPerChannel = 16;
asbd.mChannelsPerFrame = 1;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 2;
asbd.mBytesPerPacket = 2;
//Set up the file
AudioFileID audioFile;
OSStatus audioErr = noErr;
audioErr = AudioFileCreateWithURL((CFURLRef)fileURL, kAudioFileAIFFType, &asbd, kAudioFileFlags_EraseFile, &audioFile);
assert(audioErr == noErr);
//Start writing samples
long maxSampleCount = SAMPLE_RATE * DURATION;
//NSLog (#"maxSampleCount %long", maxSampleCount);
long sampleCount = 0;
UInt32 bytesToWrite = 2;
double wavelengthInSamples = SAMPLE_RATE / hz;
NSLog (#"wavelengthInSamples %f", wavelengthInSamples);
while (sampleCount < maxSampleCount) {
for (int i=0; i < wavelengthInSamples; i++) {
// sine wave
SInt16 sample = CFSwapInt16HostToBig ((SInt16)SHRT_MAX * sin(2 * M_PI * (i / wavelengthInSamples)));
audioErr = AudioFileWriteBytes(audioFile, false, sampleCount*2, &bytesToWrite, &sample);
assert(audioErr == noErr);
sampleCount++;
}
}
audioErr = AudioFileClose(audioFile);
assert(audioErr == noErr);
NSLog (#"wrote %d samples", sampleCount);
[pool drain];
return 0;
}

encoding direcshow frame buffers by using libavcodec

I am trying to encode a stream buffer of frames grabbed by ISampleGrabber(directshow) by using libavcodec. After encoding those frame I am writing it into a file. But after completion file contains only green frames.
hers is code for grabbing frames and encoding it...
void DSGrabberCallback::initFFMpeg(){
const char* filename="G:/test1.mpg";
avcodec_register_all();
printf("Encode video file %s\n", filename);
AVCodecID codec_id=AV_CODEC_ID_MPEG2VIDEO;
codec = avcodec_find_encoder(codec_id);
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
}
c->bit_rate = 4000000;
c->width = 320;
c->height = 240;
AVRational test;
test.den=25;
test.num=1;
c->time_base= test;
c->gop_size = 10;
//c->max_b_frames=1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
}
picture = alloc_picture(c->pix_fmt, c->width, c->height);
/*picture->format = c->pix_fmt;
picture->width = c->width;
picture->height = c->height;*/
av_init_packet(&pkt);
}
void DSGrabberCallback::encodeFrame(unsigned char *frame,ULONG size){
std::cout<<"called.....";
pkt.data = NULL;
pkt.size = 0;
picture->data[0]=frame;
fflush(stdout);
picture->pts=counter;
ret = avcodec_encode_video2(c, &pkt, picture, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", counter, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
STDMETHODIMP DSGrabberCallback::SampleCB(double time, IMediaSample* sample)
{
BYTE* data = NULL;
ULONG length = 0;
m_bytes=NULL;
counter=counter+1;
if(FAILED(sample->GetPointer(&data)))
{
return E_FAIL;
}
length = sample->GetActualDataLength();
if(length == 0)
{
return S_OK;
}
if(!m_bytes || m_bytesLength < length)
{
if(m_bytes)
{
delete[] m_bytes;
}
m_bytes = new unsigned char[length];
m_bytesLength = length;
}
if(true)
{
for(size_t row = 0 ; row < 480 ; row++)
{
memcpy((m_bytes + row * 640 * 2), data + (480 - 1 - row) * 640 * 2,
640 * 2);
}
}
std::cout<<"hiiiiiiiiiiiiiiiiiiiiiiii";
// memcpy(m_bytes, data, length);
// std::cout<<"called............... "<<m_bytes<<"\n";
if(counter<500){
encodeFrame(m_bytes,length);
}else{
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&picture->data[0]);
avcodec_free_frame(&picture);
printf("\n");
exit(1);
}
//rtp.sendRTP(data,length);
//sample->Release();
//printf("Sample received: %p %u\n", data, length);
return S_OK;
}
can anyone tell me where is the problem.
Now working fine. Actually I forgot to convert the image buffer into YUV420P format. I have added some code for scaling the buffer into YUV format and everything is fine now. Thank you Wimmel and Roman R.

GetOutputAvailableType() returns MF_E_TRANSFORM_TYPE_NOT_SET

I am trying to decode h264 video from my camera I have configured the decoder as follows.
I am not able to figure out why GetOutputAvailableType() function returns MF_E_TRANSFORM_TYPE_NOT_SET.I have also noticed that GetStreamIDs() returns E_NOTIMPL. I have checked in msdn it is given "MFT has a fixed number of streams, and \n the stream identifiers are consecutive starting from zero.".Can some body also explain what does this means?
HRESULT CH264_decodingDlg::ConfugureDecoder(IMFTransform *pDecoder)
{
HRESULT hr = S_OK;
IMFMediaType* pMediaTypeInput = NULL;
//Creating Input Media Type
hr = MFCreateMediaType(&pMediaTypeInput);
if (FAILED(hr)) return hr;
DWORD dwIn = 0,dwOut = 0;
hr = pDecoder->GetStreamCount(&dwIn,&dwOut);
if (FAILED(hr)) return hr;
if (dwIn)
{
DWORD dwInputID[2] ={0} ;
DWORD dwOutputID[2]= {0};
hr = pDecoder->GetStreamIDs(dwIn,dwInputID,dwOut,dwOutputID);
//if (FAILED(hr)) return hr;
GUID guidCurrent;
GUID guidMajor;
for (int i = 0; i< 8; i++)
{
hr = pDecoder->GetInputAvailableType(0,i,&pMediaTypeInput);
hr = pMediaTypeInput->GetGUID(MF_MT_MAJOR_TYPE, &guidMajor);
if (guidMajor == MFMediaType_Video) //safety check
{
if (hr == MF_E_NO_MORE_TYPES)
{
break;
}
hr = pMediaTypeInput->GetGUID( MF_MT_SUBTYPE, &guidCurrent );
if ( guidCurrent == MFVideoFormat_H264 ) //m_pVideoSubtype = MEDIASUBTYPE_WVC1
{
break;
}
}
}
// Set MF_MT_MAJOR_TYPE
hr = pMediaTypeInput->SetGUID(MF_MT_MAJOR_TYPE ,MFMediaType_Video);
if (FAILED(hr)) return hr;
// Set MF_MT_SUBTYPE
hr = pMediaTypeInput->SetGUID(MF_MT_SUBTYPE ,MFVideoFormat_H264);
if (FAILED(hr)) return hr;
// Set MF_MT_FRAME_RATE
UINT32 nNumerator = 30;
UINT32 nDenominator = 1;
hr = MFSetAttributeRatio(pMediaTypeInput,MF_MT_FRAME_RATE ,nNumerator,nDenominator);
if (FAILED(hr)) return hr;
// Set MF_MT_FRAME_SIZE
UINT32 nWidth = 640 ;
UINT32 nHeight = 480;
hr = MFSetAttributeSize(pMediaTypeInput, MF_MT_FRAME_SIZE, nWidth, nHeight);
if (FAILED(hr)) return hr;
// Set MF_MT_MPEG2_PROFILE
hr = pMediaTypeInput->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_Base);
if (FAILED(hr)) return hr;
// Set MF_MT_INTERLACE_MODE
hr = pMediaTypeInput->SetUINT32(MF_MT_INTERLACE_MODE ,MFVideoInterlace_Progressive);
if (FAILED(hr)) return hr;
// Set MF_MT_PIXEL_ASPECT_RATIO
hr = MFSetAttributeRatio(pMediaTypeInput,MF_MT_PIXEL_ASPECT_RATIO,1,1);
if (FAILED(hr)) return hr;
//Set Input Media Type
hr = pDecoder->SetInputType(0,pMediaTypeInput,MFT_SET_TYPE_TEST_ONLY);
if (FAILED(hr)) return hr;
//Creating Output Media Type
IMFMediaType* pMediaTypeOutput = NULL;
hr = MFCreateMediaType(&pMediaTypeOutput);
if (FAILED(hr)) return hr;
hr = pDecoder->GetOutputAvailableType(0,0,&pMediaTypeOutput);
if (FAILED(hr)) return hr;`
Before you can get output media type, you have to set input media type. I don't see you are doing this (SetInputType is called with MFT_SET_TYPE_TEST_ONLY).

Resources