I've been struggling with a resource leak seemingly caused by NVIDIA's h.264 encoder MFT. Each time a frame is submitted to the encoder, the reference count of my D3D device is incremented by 1, and this reference is not given up even after shutting down the MFT. A bunch of threads are leaked as well.
I'm almost ready to bring this up with NVIDIA, but I'd like to first make sure there's nothing obvious I have missed. Please see my implementation below - I've tried to keep it as concise and clear as possible.
Arguments for why this might be a problem with NVIDIA's encoder:
This only happens with NVIDIA's encoder. No leak is observed when running on e.g. Intel's QuickSync.
Arguments for why this might be a problem in my code:
I've tried using a SinkWriter to write DXGI surfaces to a file in a similar fashion, and here the leak is not present. Unfortunately I don't have access to the source code of SinkWriter. I would be very happy if anyone could point me to some working sample code that I could compare against.
#pragma comment(lib, "D3D11.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "evr.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "Winmm.lib")
// std
#include <iostream>
#include <string>
// Windows
#include <windows.h>
#include <atlbase.h>
// DirectX
#include <d3d11.h>
// Media Foundation
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <Codecapi.h>
// Error handling
#define CHECK(x) if (!(x)) { printf("%s(%d) %s was false\n", __FILE__, __LINE__, #x); throw std::exception(); }
#define CHECK_HR(x) { HRESULT hr_ = (x); if (FAILED(hr_)) { printf("%s(%d) %s failed with 0x%x\n", __FILE__, __LINE__, #x, hr_); throw std::exception(); } }
// Constants
constexpr UINT ENCODE_WIDTH = 1920;
constexpr UINT ENCODE_HEIGHT = 1080;
constexpr UINT ENCODE_FRAMES = 120;
void runEncode();
int main()
{
CHECK_HR(CoInitializeEx(NULL, COINIT_APARTMENTTHREADED));
CHECK_HR(MFStartup(MF_VERSION));
for (;;)
{
runEncode();
if (getchar() == 'q')
break;
}
CHECK_HR(MFShutdown());
return 0;
}
void runEncode()
{
CComPtr<ID3D11Device> device;
CComPtr<ID3D11DeviceContext> context;
CComPtr<IMFDXGIDeviceManager> deviceManager;
CComPtr<IMFVideoSampleAllocatorEx> allocator;
CComPtr<IMFTransform> transform;
CComPtr<IMFAttributes> transformAttrs;
CComQIPtr<IMFMediaEventGenerator> eventGen;
DWORD inputStreamID;
DWORD outputStreamID;
// ------------------------------------------------------------------------
// Initialize D3D11
// ------------------------------------------------------------------------
CHECK_HR(D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, D3D11_CREATE_DEVICE_VIDEO_SUPPORT | D3D11_CREATE_DEVICE_DEBUG, NULL, 0, D3D11_SDK_VERSION, &device, NULL, &context));
{
// Probably not necessary in this application, but maybe the MFT requires it?
CComQIPtr<ID3D10Multithread> mt(device);
CHECK(mt);
mt->SetMultithreadProtected(TRUE);
}
// Create device manager
UINT resetToken;
CHECK_HR(MFCreateDXGIDeviceManager(&resetToken, &deviceManager));
CHECK_HR(deviceManager->ResetDevice(device, resetToken));
// ------------------------------------------------------------------------
// Initialize hardware encoder MFT
// ------------------------------------------------------------------------
{
// Find the encoder
CComHeapPtr<IMFActivate*> activateRaw;
UINT32 activateCount = 0;
// Input & output types
MFT_REGISTER_TYPE_INFO inInfo = { MFMediaType_Video, MFVideoFormat_NV12 };
MFT_REGISTER_TYPE_INFO outInfo = { MFMediaType_Video, MFVideoFormat_H264 };
// Query for the adapter LUID to get a matching encoder for the device.
CComQIPtr<IDXGIDevice> dxgiDevice(device);
CHECK(dxgiDevice);
CComPtr<IDXGIAdapter> adapter;
CHECK_HR(dxgiDevice->GetAdapter(&adapter));
DXGI_ADAPTER_DESC adapterDesc;
CHECK_HR(adapter->GetDesc(&adapterDesc));
CComPtr<IMFAttributes> enumAttrs;
CHECK_HR(MFCreateAttributes(&enumAttrs, 1));
CHECK_HR(enumAttrs->SetBlob(MFT_ENUM_ADAPTER_LUID, (BYTE*)&adapterDesc.AdapterLuid, sizeof(LUID)));
CHECK_HR(MFTEnum2(MFT_CATEGORY_VIDEO_ENCODER, MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER, &inInfo, &outInfo, enumAttrs, &activateRaw, &activateCount));
CHECK(activateCount != 0);
// Choose the first returned encoder
CComPtr<IMFActivate> activate = activateRaw[0];
// Memory management
for (UINT32 i = 0; i < activateCount; i++)
activateRaw[i]->Release();
// Activate
CHECK_HR(activate->ActivateObject(IID_PPV_ARGS(&transform)));
// Get attributes
CHECK_HR(transform->GetAttributes(&transformAttrs));
}
// ------------------------------------------------------------------------
// Query encoder name (not necessary, but nice) and unlock for async use
// ------------------------------------------------------------------------
{
UINT32 nameLength = 0;
std::wstring name;
CHECK_HR(transformAttrs->GetStringLength(MFT_FRIENDLY_NAME_Attribute, &nameLength));
// IMFAttributes::GetString returns a null-terminated wide string
name.resize((size_t)nameLength + 1);
CHECK_HR(transformAttrs->GetString(MFT_FRIENDLY_NAME_Attribute, &name[0], (UINT32)name.size(), &nameLength));
name.resize(nameLength);
printf("Using %ls\n", name.c_str());
// Unlock the transform for async use and get event generator
CHECK_HR(transformAttrs->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, TRUE));
CHECK(eventGen = transform);
}
// Get stream IDs (expect 1 input and 1 output stream)
{
HRESULT hr = transform->GetStreamIDs(1, &inputStreamID, 1, &outputStreamID);
if (hr == E_NOTIMPL)
{
inputStreamID = 0;
outputStreamID = 0;
hr = S_OK;
}
CHECK_HR(hr);
}
// ------------------------------------------------------------------------
// Configure hardware encoder MFT
// ------------------------------------------------------------------------
// Set D3D manager
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(deviceManager.p)));
// Set output type
CComPtr<IMFMediaType> outputType;
CHECK_HR(MFCreateMediaType(&outputType));
CHECK_HR(outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHECK_HR(outputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264));
CHECK_HR(outputType->SetUINT32(MF_MT_AVG_BITRATE, 30000000));
CHECK_HR(MFSetAttributeSize(outputType, MF_MT_FRAME_SIZE, ENCODE_WIDTH, ENCODE_HEIGHT));
CHECK_HR(MFSetAttributeRatio(outputType, MF_MT_FRAME_RATE, 60, 1));
CHECK_HR(outputType->SetUINT32(MF_MT_INTERLACE_MODE, 2));
CHECK_HR(outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE));
CHECK_HR(transform->SetOutputType(outputStreamID, outputType, 0));
// Set input type
CComPtr<IMFMediaType> inputType;
CHECK_HR(transform->GetInputAvailableType(inputStreamID, 0, &inputType));
CHECK_HR(inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHECK_HR(inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12));
CHECK_HR(MFSetAttributeSize(inputType, MF_MT_FRAME_SIZE, ENCODE_WIDTH, ENCODE_HEIGHT));
CHECK_HR(MFSetAttributeRatio(inputType, MF_MT_FRAME_RATE, 60, 1));
CHECK_HR(transform->SetInputType(inputStreamID, inputType, 0));
// ------------------------------------------------------------------------
// Create sample allocator
// ------------------------------------------------------------------------
{
MFCreateVideoSampleAllocatorEx(IID_PPV_ARGS(&allocator));
CHECK(allocator);
CComPtr<IMFAttributes> allocAttrs;
MFCreateAttributes(&allocAttrs, 2);
CHECK_HR(allocAttrs->SetUINT32(MF_SA_D3D11_BINDFLAGS, D3D11_BIND_RENDER_TARGET));
CHECK_HR(allocAttrs->SetUINT32(MF_SA_D3D11_USAGE, D3D11_USAGE_DEFAULT));
CHECK_HR(allocator->SetDirectXManager(deviceManager));
CHECK_HR(allocator->InitializeSampleAllocatorEx(1, 2, allocAttrs, inputType));
}
// ------------------------------------------------------------------------
// Start encoding
// ------------------------------------------------------------------------
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL));
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL));
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL));
// Encode loop
for (int i = 0; i < ENCODE_FRAMES; i++)
{
// Get next event
CComPtr<IMFMediaEvent> event;
CHECK_HR(eventGen->GetEvent(0, &event));
MediaEventType eventType;
CHECK_HR(event->GetType(&eventType));
switch (eventType)
{
case METransformNeedInput:
{
CComPtr<IMFSample> sample;
CHECK_HR(allocator->AllocateSample(&sample));
CHECK_HR(transform->ProcessInput(inputStreamID, sample, 0));
// Dereferencing the device once after feeding each frame "fixes" the leak.
//device.p->Release();
break;
}
case METransformHaveOutput:
{
DWORD status;
MFT_OUTPUT_DATA_BUFFER outputBuffer = {};
outputBuffer.dwStreamID = outputStreamID;
CHECK_HR(transform->ProcessOutput(0, 1, &outputBuffer, &status));
DWORD bufCount;
DWORD bufLength;
CHECK_HR(outputBuffer.pSample->GetBufferCount(&bufCount));
CComPtr<IMFMediaBuffer> outBuffer;
CHECK_HR(outputBuffer.pSample->GetBufferByIndex(0, &outBuffer));
CHECK_HR(outBuffer->GetCurrentLength(&bufLength));
printf("METransformHaveOutput buffers=%d, bytes=%d\n", bufCount, bufLength);
// Release the sample as it is not processed further.
if (outputBuffer.pSample)
outputBuffer.pSample->Release();
if (outputBuffer.pEvents)
outputBuffer.pEvents->Release();
break;
}
}
}
// ------------------------------------------------------------------------
// Finish encoding
// ------------------------------------------------------------------------
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, NULL));
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, NULL));
CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, NULL));
// Shutdown
printf("Finished encoding\n");
// I've tried all kinds of things...
//CHECK_HR(transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(nullptr)));
//transform->SetInputType(inputStreamID, NULL, 0);
//transform->SetOutputType(outputStreamID, NULL, 0);
//transform->DeleteInputStream(inputStreamID);
//deviceManager->ResetDevice(NULL, resetToken);
CHECK_HR(MFShutdownObject(transform));
}
I think the answer is “yes”.
I saw the problem before: Is it possible to shut down a D3D device?
To workaround, I stopped re-creating D3D devices. Instead I’m using a global CAtlMap collection. The keys are uint64_t containing LUID of the GPU from DXGI_ADAPTER_DESC::AdapterLuid field. The values are structures with 2 fields, CComPtr<ID3D11Device> and CComPtr<IMFDXGIDeviceManager>
I was trying to poll available devices on a Windows machine by this code example from Microsoft. But there are link errors that relates to _main().
#include <stdio.h>
#include <windows.h>
#include <setupapi.h>
#include <devguid.h>
#include <regstr.h>
int main( int argc, char *argv[ ], char *envp[ ] )
{
HDEVINFO hDevInfo;
SP_DEVINFO_DATA DeviceInfoData;
DWORD i;
// Create a HDEVINFO with all present devices.
hDevInfo = SetupDiGetClassDevs(NULL,
0, // Enumerator
0,
DIGCF_PRESENT | DIGCF_ALLCLASSES );
if (hDevInfo == INVALID_HANDLE_VALUE)
{
// Insert error handling here.
return 1;
}
// Enumerate through all devices in Set.
DeviceInfoData.cbSize = sizeof(SP_DEVINFO_DATA);
for (i=0;SetupDiEnumDeviceInfo(hDevInfo,i,
&DeviceInfoData);i++)
{
DWORD DataT;
LPTSTR buffer = NULL;
DWORD buffersize = 0;
//
// Call function with null to begin with,
// then use the returned buffer size (doubled)
// to Alloc the buffer. Keep calling until
// success or an unknown failure.
//
// Double the returned buffersize to correct
// for underlying legacy CM functions that
// return an incorrect buffersize value on
// DBCS/MBCS systems.
//
while (!SetupDiGetDeviceRegistryProperty(
hDevInfo,
&DeviceInfoData,
SPDRP_DEVICEDESC,
&DataT,
(PBYTE)buffer,
buffersize,
&buffersize))
{
if (GetLastError() ==
ERROR_INSUFFICIENT_BUFFER)
{
// Change the buffer size.
if (buffer) LocalFree(buffer);
// Double the size to avoid problems on
// W2k MBCS systems per KB 888609.
buffer = LocalAlloc(LPTR,buffersize * 2);
}
else
{
// Insert error handling here.
break;
}
}
printf("Result:[%s]\n",buffer);
if (buffer) LocalFree(buffer);
}
if ( GetLastError()!=NO_ERROR &&
GetLastError()!=ERROR_NO_MORE_ITEMS )
{
// Insert error handling here.
return 1;
}
// Cleanup
SetupDiDestroyDeviceInfoList(hDevInfo);
return 0;
}
For some reasons, there are link errors:
1>device.obj : error LNK2019: unresolved external symbol __imp__SetupDiDestroyDeviceInfoList#4 referenced in function _main
1>device.obj : error LNK2019: unresolved external symbol __imp__SetupDiGetDeviceRegistryPropertyW#28 referenced in function _main
1>device.obj : error LNK2019: unresolved external symbol __imp__SetupDiEnumDeviceInfo#12 referenced in function _main
1>device.obj : error LNK2019: unresolved external symbol __imp__SetupDiGetClassDevsW#16 referenced in function _main
1>c:\users\visual studio 2010\Projects\usb\Debug\usb.exe : fatal error LNK1120: 4 unresolved externals
What they are about? There is no _main() at all.
First thing first, the linker errors are not about missing main, but about other functions that are referenced (called) from main.
The functions you mentioned in linker errors are from library: Setupapi.lib, and you need to include it in Linker settings (Input) of your project.
Why did it worked on VS re-open?
Probably simply because you changed the configuration (by mistake). For example from Win32 to x64, and/or from Debug to Release (or any combination) of these. The other configuration didn't have reference to this library added.
I am toggling the input into a GPIO line on my BeagleBone from high to low every 500 ms using an Atmel uC. I have registered a handler for this in my Linux Kernel Module, but the handler is not being called for some reason.
My module code is -
#define GPIO 54
#define GPIO_INT_NAME "gpio_int"
#define GPIO_HIGH gpio_get_value(GPIO)
#define GPIO_LOW (gpio_get_value(GPIO) == 0)
short int irq_any_gpio = 0;
int count =0;
enum { falling, rising } type;
static irqreturn_t r_irq_handler(int irq, void *dev_id)
{
count++;
printk(KERN_DEBUG "interrupt received (irq: %d)\n", irq);
if (irq == gpio_to_irq(GPIO))
{
type = GPIO_LOW ? falling : rising;
if(type == falling)
{
printk("gpio pin is low\n");
}
else
printk("gpio pin is high\n");
}
return IRQ_HANDLED;
}
void r_int_config(void) {
if (gpio_request(GPIO, GPIO_INT_NAME ))
{
printk("GPIO request failure: %s\n", GPIO_INT_NAME );
return;
}
if ( (irq_any_gpio = gpio_to_irq(GPIO)) < 0 ) {
printk("GPIO to IRQ mapping failure %s\n",GPIO_INT_NAME );
return;
}
printk(KERN_NOTICE "Mapped int %d\n", irq_any_gpio);
if (request_irq(irq_any_gpio,(irq_handler_t ) r_irq_handler, IRQF_TRIGGER_HIGH, GPIO_INT_NAME, NULL))
{
printk("Irq Request failure\n");
return;
}
return;
}
void r_int_release(void) {
free_irq(gpio_to_irq(GPIO), NULL);
gpio_free(GPIO);;
return;
}
int init_module(void)
{
printk("<1>Hello World\n");
r_int_config();
return 0;
}
On calling insmod interrupt_test.ko, i get the following message
[ 76.594543] Hello World
[ 76.597137] Mapped int 214
But now when I start toggling the input into this gpio pin, the interrupt handler doesn't get called and the message (interrupt received is not being displayed).
How do I solve this ? What's causing the problem?
IMO your module lacks some basic kernel module programming prerequisites. At first, you have no includes. For toggling GPIO pins use the following.
#include <linux/init.h> // initialization macros
#include <linux/module.h> // dynamic loading of modules into the kernel
#include <linux/kernel.h> // kernel stuff
#include <linux/gpio.h> // GPIO functions/macros
#include <linux/interrupt.h> // interrupt functions/macros
You should use MODULE_LICENSE("Your license") otherwise the kernel may get tainted and your initialization and exit functions should be marked with __init and __exit macros like:
void __init init_module(void) {...}
void __exit r_int_release(void) {...} // exit code should cleanup all stuff
Furthermore the kernel needs to know which functions to call on module load and exit. Therefore use:
module_init(init_module); // Do some better naming
module_exit(r_int_release);
Now to the IRQ. IRQs must be assigned to GPIOs and you must specify an IRQ handler. So what you need to do within your modules init code is:
static unsigned int yourGPIO = 49; // This is Pin 23 on the P9 header
static unsigned int irqNumber;
static irq_handler_t irqHandler(unsigned int irq, void *dev_id, struct pt_regs *regs);
// This is the IRQ Handler prototype
static int __init init_module(void)
{
int result = 0;
gpio_request(yourGPIO, "fancy label"); // Request a GPIO pin from the driver
// 'yourGPIO' is expected to be an unsigned int, i.e. the GPIO number
gpio_direction_input(yourGPIO); // Set GPIO as input
gpio_set_debounce(yourGPIO, 50); // Set a 50ms debounce, adjust to your needs
gpio_export(yourGPIO); // The GPIO will appear in /sys/class/gpio
...
Without a GPIO pin requested from the driver, the following will fail
...
irqNumber = gpio_to_irq(yourGPIO); // map your GPIO to an IRQ
result = request_irq(irqNumber, // requested interrupt
(irq_handler_t) irqHandler, // pointer to handler function
IRQF_TRIGGER_RISING, // interrupt mode flag
"irqHandler", // used in /proc/interrupts
NULL); // the *dev_id shared interrupt lines, NULL is okay
return result;
}
Last but not least implement your handler function and load the module.
For more reference I recommend reading Derek Molloys Blog on derekmolloy.ie
My question is: why does VC emit warning 4365 for only one of the commented lines below, and not the other?
#pragma warning(1: 4365)
void test1(const unsigned short) {}
unsigned short test2() { return 0; }
int main()
{
const unsigned short a = 0;
const unsigned short b = 0;
test1(a + b); // This line gives no warning
test1(test2() + b); // This line gives C4365
return 0;
}
Tested under VS2010 and VS2012 Express.
For reference, the full warning text is this:
warning C4365: 'argument' : conversion from 'int' to 'const unsigned short', signed/unsigned mismatch
Using Clang 3.3 (through Clang-Win32 and ClangVSx), no warnings are reported in this code (except of course the unknown pragma).
I am currently working on a project using network simulator 2. When I add variable inside the structure re_block, the program compiles but gives me segmentation fault during runtime. When i declare the variable as static there is no runtime error. Someone please explain this.
struct re_block {
# if __BYTE_ORDER == __BIG_ENDIAN
u_int16_t g : 1;
u_int16_t prefix : 7;
u_int16_t res : 2;
u_int16_t re_hopcnt : 6;
# elif __BYTE_ORDER == __LITTLE_ENDIAN
u_int16_t res : 2;
u_int16_t re_hopcnt : 6;
u_int16_t g : 1;
u_int16_t prefix : 7;
# else
# error "Adjust your <bits/endian.h> defines"
# endif
u_int32_t re_node_addr;
u_int32_t re_node_seqnum;
};
#define MAX_RE_BLOCKS
typedef struct {
u_int32_t m : 1;
u_int32_t h : 2;
u_int32_t type : 5;
u_int32_t len : 12;
u_int32_t ttl : 6;
u_int32_t i : 1;
u_int32_t a : 1;
u_int32_t s : 1;
u_int32_t res1 : 3;
u_int32_t target_addr;
u_int32_t target_seqnum;
u_int8_t thopcnt : 6;
u_int8_t res2 : 2;
struct re_block re_blocks[MAX_RE_BLOCKS];
} RE;
I want to add two float variables in struct re_block. Please help
Using a memory debugging tool such as valgrind, can you find the place in the code where the segfault occurs? My guess would be that there is some runtime code that takes advantage of the data layout of the re_block struct, for example by casting a pointer to a re_block instance to type (u_int16_t *) and dereferencing as a means of getting the first member, rather than using the operator ->. Adding members to the struct can change the layout of the data, so code that uses such tricks may break.