Related
I have written a program to scan kernel memory for a pattern from user space. I run it from root. I expect that it will generate SIGSEGVs when it hits pages that aren't accessible; I would like to ignore those faults and just jump to the next page to continue the search. I have set up a signal handler that works fine for the first occurrence, and it continues onward as expected. However, when a second SIGSEGV occurs, the handler is ignored (it was reregistered after the first occurrence) and the program terminates. The relevant portions of the code are:
jmp_buf restore_point;
void segv_handler(int sig, siginfo_t* info, void* ucontext)
{
longjmp(restore_point, SIGSEGV);
}
void setup_segv_handler()
{
struct sigaction sa;
sa.sa_flags = SA_SIGINFO|SA_RESTART|SA_RESETHAND;
sigemptyset (&sa.sa_mask);
sa.sa_sigaction = &segv_handler;
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
fprintf(stderr, "failed to setup SIGSEGV handler\n");
}
}
unsigned long search_kernel_memory_area(unsigned long start_address, size_t area_len, const void* pattern, size_t pattern_len)
{
int fd;
char* kernel_mem;
fd = open("/dev/kmem", O_RDONLY);
if (fd < 0)
{
perror("open /dev/kmem failed");
return -1;
}
unsigned long page_size = sysconf(_SC_PAGESIZE);
unsigned long page_aligned_offset = (start_address/page_size)*page_size;
unsigned long area_pages = area_len/page_size + (area_len%page_size ? 1 : 0);
kernel_mem =
mmap(0, area_pages,
PROT_READ, MAP_SHARED,
fd, page_aligned_offset);
if (kernel_mem == MAP_FAILED)
{
perror("mmap failed");
return -1;
}
if (!mlock((const void*)kernel_mem,area_len))
{
perror("mlock failed");
return -1;
}
unsigned long offset_into_page = start_address-page_aligned_offset;
unsigned long start_area_address = (unsigned long)kernel_mem + offset_into_page;
unsigned long end_area_address = start_area_address+area_len-pattern_len+1;
unsigned long addr;
setup_segv_handler();
for (addr = start_area_address; addr < end_area_address;addr++)
{
unsigned char* kmp = (unsigned char*)addr;
unsigned char* pmp = (unsigned char*)pattern;
size_t index = 0;
for (index = 0; index < pattern_len; index++)
{
if (setjmp(restore_point) == 0)
{
unsigned char p = *pmp;
unsigned char k = *kmp;
if (k != p)
{
break;
}
pmp++;
kmp++;
}
else
{
addr += page_size -1;
setup_segv_handler();
break;
}
}
if (index >= pattern_len)
{
return addr;
}
}
munmap(kernel_mem,area_pages);
close(fd);
return 0;
}
I realize I can use functions like memcmp to avoid programming the matching part directly (I did this initially), but I subsequently wanted to insure the finest grained control for recovering from the faults so I could see exactly what was happening.
I scoured the Internet to find information about this behavior, and came up empty. The linux system I am running this under is arm 3.12.30.
If what I am trying to do is not possible under linux, is there some way I can get the current state of the kernel pages from user space (which would allow me to avoid trying to search pages that are inaccessible.) I searched for calls that might provide such information, but also came up empty.
Thanks for your help!
While longjmp is perfectly allowed to be used in the signal handler (the function is known as async-signal-safe, see man signal-safety) and effectively exits from the signal handling, it doesn't restore signal mask. The mask is automatically modified at the time when signal handler is called to block new SIGSEGV signal to interrupt the handler.
While one may restore signal mask manually, it is better (and simpler) to use siglongjmp function instead: aside from the effect of longjmp, it also restores the signal mask. Of course, in that case sigsetjmp function should be used instead of setjmp:
// ... in main() function
if(sigsetjmp(restore_point, 1)) // Aside from other things, store signal mask
// ...
// ... in the signal handler
siglongjmp(restore_point); // Also restore signal mask as it was at sigsetjmp() call
Today I developing RTOS (CMSIS RTOS) for kit STM32 nucleo L053R8. I have issue relate to multiple task.
I create 4 task(task_1, task_2, task_3, task_4), however only 3 task run.
This is part of my code:
#include "main.h"
#include "stm32l0xx_hal.h"
#include "cmsis_os.h"
osMutexId stdio_mutex;
osMutexDef(stdio_mutex);
int main(void){
.....
stdio_mutex = osMutexCreate(osMutex(stdio_mutex));
osThreadDef(defaultTask_1, StartDefaultTask_1, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_1), NULL);
osThreadDef(defaultTask_2, StartDefaultTask_2, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_2), NULL);
osThreadDef(defaultTask_3, StartDefaultTask_3, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_3), NULL);
osThreadDef(defaultTask_4, StartDefaultTask_4, osPriorityNormal, 0, 600);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_4), NULL);
}
void StartDefaultTask_1(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_2(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_3(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_4(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
this is reslut in console (uart):
when I change stack size for task 4 from 600 -> 128 as below:
osThreadDef(defaultTask_4, StartDefaultTask_4, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_4), NULL);
then don't have any task run.
Actualy I want make many thread for my application, however this issue cause difficult to implement.
Could you let me know root cause of prblem? and how to resolve it.
Thank you advance!!
There is no common easy method of the stack calculation. It depends on many factors.
I would suggest to avoid stack greedy functions like printf scanf etc. Write your own ones, not as "smart" and universal but less resources greedy.
Avoid large local variables. Be very careful when you allocate the memory
As your Suggestions, I checked by debug and see root cause is heap size is small.
I resolve by 2 method
increase heap size: #define configTOTAL_HEAP_SIZE ((size_t)5120)
decrease stack size: #define configMINIMAL_STACK_SIZE ((uint16_t)64)
osThreadDef(defaultTask_6, StartDefaultTask_6, osPriorityNormal, 0, 64);
Do you know how to determine max of heap size? Please let me known.
Thank you so much
The libavcodec documentation is not very specific about when to free allocated data and how to free it. After reading through documentation and examples, I've put together the sample program below. There are some specific questions inlined in the source but my general question is, am I freeing all memory properly in the code below? I realize the program below doesn't do any cleanup after errors -- the focus is on final cleanup.
The testfile() function is the one in question.
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
}
#include <cstdio>
using namespace std;
void AVFAIL (int code, const char *what) {
char msg[500];
av_strerror(code, msg, sizeof(msg));
fprintf(stderr, "failed: %s\nerror: %s\n", what, msg);
exit(2);
}
#define AVCHECK(f) do { int e = (f); if (e < 0) AVFAIL(e, #f); } while (0)
#define AVCHECKPTR(p,f) do { p = (f); if (!p) AVFAIL(AVERROR_UNKNOWN, #f); } while (0)
void testfile (const char *filename) {
AVFormatContext *format;
unsigned streamIndex;
AVStream *stream = NULL;
AVCodec *codec;
SwsContext *sws;
AVPacket packet;
AVFrame *rawframe;
AVFrame *rgbframe;
unsigned char *rgbdata;
av_register_all();
// load file header
AVCHECK(av_open_input_file(&format, filename, NULL, 0, NULL));
AVCHECK(av_find_stream_info(format));
// find video stream
for (streamIndex = 0; streamIndex < format->nb_streams && !stream; ++ streamIndex)
if (format->streams[streamIndex]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
stream = format->streams[streamIndex];
if (!stream) {
fprintf(stderr, "no video stream\n");
exit(2);
}
// initialize codec
AVCHECKPTR(codec, avcodec_find_decoder(stream->codec->codec_id));
AVCHECK(avcodec_open(stream->codec, codec));
int width = stream->codec->width;
int height = stream->codec->height;
// initialize frame buffers
int rgbbytes = avpicture_get_size(PIX_FMT_RGB24, width, height);
AVCHECKPTR(rawframe, avcodec_alloc_frame());
AVCHECKPTR(rgbframe, avcodec_alloc_frame());
AVCHECKPTR(rgbdata, (unsigned char *)av_mallocz(rgbbytes));
AVCHECK(avpicture_fill((AVPicture *)rgbframe, rgbdata, PIX_FMT_RGB24, width, height));
// initialize sws (for conversion to rgb24)
AVCHECKPTR(sws, sws_getContext(width, height, stream->codec->pix_fmt, width, height, PIX_FMT_RGB24, SWS_FAST_BILINEAR, NULL, NULL, NULL));
// read all frames fromfile
while (av_read_frame(format, &packet) >= 0) {
int frameok = 0;
if (packet.stream_index == (int)streamIndex)
AVCHECK(avcodec_decode_video2(stream->codec, rawframe, &frameok, &packet));
av_free_packet(&packet); // Q: is this necessary or will next av_read_frame take care of it?
if (frameok) {
sws_scale(sws, rawframe->data, rawframe->linesize, 0, height, rgbframe->data, rgbframe->linesize);
// would process rgbframe here
}
// Q: is there anything i need to free here?
}
// CLEANUP: Q: am i missing anything / doing anything unnecessary?
av_free(sws); // Q: is av_free all i need here?
av_free_packet(&packet); // Q: is this necessary (av_read_frame has returned < 0)?
av_free(rgbframe);
av_free(rgbdata);
av_free(rawframe); // Q: i can just do this once at end, instead of in loop above, right?
avcodec_close(stream->codec); // Q: do i need av_free(codec)?
av_close_input_file(format); // Q: do i need av_free(format)?
}
int main (int argc, char **argv) {
if (argc != 2) {
fprintf(stderr, "usage: %s filename\n", argv[0]);
return 1;
}
testfile(argv[1]);
}
Specific questions:
Is there anything I need to free in the frame processing loop; or will libav take care of memory management there for me?
Is av_free the correct way to free an SwsContext?
The frame loop exits when av_read_frame returns < 0. In that case, do I still need to av_free_packet when it's done?
Do I need to call av_free_packet every time through the loop or will av_read_frame free/reuse the old AVPacket automatically?
I can just av_free the AVFrames at the end of the loop instead of reallocating them each time through, correct? It seems to be working fine, but I'd like to confirm that it's working because it's supposed to, rather than by luck.
Do I need to av_free(codec) the AVCodec or do anything else after avcodec_close on the AVCodecContext?
Do I need to av_free(format) the AVFormatContext or do anything else after av_close_input_file?
I also realize that some of these functions are deprecated in current versions of libav. For reasons that are not relevant here, I have to use them.
Those functions are not just deprecated, they've been removed some time ago. So you should really consider upgrading.
Anyway, as for your questions:
1) no, nothing more to free
2) no, use sws_freeContext()
3) no, if av_read_frame() returns an error then the packet does not contain any valid data
4) yes you have to free the packet after you're done with it and before next av_read_frame() call
5) yes, it's perfectly valid
6) no, the codec context itself is allocated by libavformat so av_close_input_file() is
responsible for freeing it. So nothing more for you to do.
7) no, av_close_input_file() frees the format context so there should be nothing more for you to do.
Update: I've reproduced the problem! Scroll lower to see the code.
Quick Notes
My Core i5 CPU has 2 cores, hyperthreading.
If I call SetProcessAffinityMask(GetCurrentProcess(), 1), everything is fine, even though the program is still multithreaded.
If I don't do that, and the program is running on Windows XP (it's fine on Windows 7 x64!), my GUI starts locking up for several seconds while I'm scrolling the list view and the icons are loading.
The Problem
Basically, when I run the program posted below (a reduced version of my original code) on Windows XP (Windows 7 is fine), unless I force the same logical CPU for all my threads, the program UI starts lagging behind by half a second or so.
(Note: Lots of edits to this post here, as I investigated the problem further.)
Note that the number of threads is the same -- only the affinity mask is different.
I've tried this out using two different methods of message-passing: the built-in GetMessage as well as my own BackgroundWorker.
The result? BackgroundWorker benefits from affinity for 1 logical CPU (virtually no lag), whereas GetMessage is completely hurt by this, (lag is now many seconds long).
I can't figure out why that would be happening -- shouldn't multiple CPUs work better than a single CPU?!
Why would there be such a lag, when the number of threads is the same?
More stats:
GetLogicalProcessorInformation returns:
0x0: {ProcessorMask=0x0000000000000003 Relationship=RelationProcessorCore ...}
0x1: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x2: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x3: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x4: {ProcessorMask=0x000000000000000f Relationship=RelationProcessorPackage ...}
0x5: {ProcessorMask=0x000000000000000c Relationship=RelationProcessorCore ...}
0x6: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x7: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x8: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x9: {ProcessorMask=0x000000000000000f Relationship=RelationCache ...}
0xa: {ProcessorMask=0x000000000000000f Relationship=RelationNumaNode ...}
The Code
The code below should shows this problem on Windows XP SP3.
(At least, it does on my computer!)
Compare these two:
Run the program normally, then scroll. You should see lag.
Run the program with the affinity command-line argument, then scroll. It should be almost completely smooth.
Why would this happen?
#define _WIN32_WINNT 0x502
#include <tchar.h>
#include <Windows.h>
#include <CommCtrl.h>
#pragma comment(lib, "kernel32.lib")
#pragma comment(lib, "comctl32.lib")
#pragma comment(lib, "user32.lib")
LONGLONG startTick = 0;
LONGLONG QPC()
{ LARGE_INTEGER v; QueryPerformanceCounter(&v); return v.QuadPart; }
LONGLONG QPF()
{ LARGE_INTEGER v; QueryPerformanceFrequency(&v); return v.QuadPart; }
bool logging = false;
bool const useWindowMessaging = true; // GetMessage() or BackgroundWorker?
bool const autoScroll = false; // for testing
class BackgroundWorker
{
struct Thunk
{
virtual void operator()() = 0;
virtual ~Thunk() { }
};
class CSLock
{
CRITICAL_SECTION& cs;
public:
CSLock(CRITICAL_SECTION& criticalSection)
: cs(criticalSection)
{ EnterCriticalSection(&this->cs); }
~CSLock() { LeaveCriticalSection(&this->cs); }
};
template<typename T>
class ScopedPtr
{
T *p;
ScopedPtr(ScopedPtr const &) { }
ScopedPtr &operator =(ScopedPtr const &) { }
public:
ScopedPtr() : p(NULL) { }
explicit ScopedPtr(T *p) : p(p) { }
~ScopedPtr() { delete p; }
T *operator ->() { return p; }
T &operator *() { return *p; }
ScopedPtr &operator =(T *p)
{
if (this->p != NULL) { __debugbreak(); }
this->p = p;
return *this;
}
operator T *const &() { return this->p; }
};
Thunk **const todo;
size_t nToDo;
CRITICAL_SECTION criticalSection;
DWORD tid;
HANDLE hThread, hSemaphore;
volatile bool stop;
static size_t const MAX_TASKS = 1 << 18; // big enough for testing
static DWORD CALLBACK entry(void *arg)
{ return ((BackgroundWorker *)arg)->process(); }
public:
BackgroundWorker()
: nToDo(0), todo(new Thunk *[MAX_TASKS]), stop(false), tid(0),
hSemaphore(CreateSemaphore(NULL, 0, 1 << 30, NULL)),
hThread(CreateThread(NULL, 0, entry, this, CREATE_SUSPENDED, &tid))
{
InitializeCriticalSection(&this->criticalSection);
ResumeThread(this->hThread);
}
~BackgroundWorker()
{
// Clear all the tasks
this->stop = true;
this->clear();
LONG prev;
if (!ReleaseSemaphore(this->hSemaphore, 1, &prev) ||
WaitForSingleObject(this->hThread, INFINITE) != WAIT_OBJECT_0)
{ __debugbreak(); }
CloseHandle(this->hSemaphore);
CloseHandle(this->hThread);
DeleteCriticalSection(&this->criticalSection);
delete [] this->todo;
}
void clear()
{
CSLock lock(this->criticalSection);
while (this->nToDo > 0)
{
delete this->todo[--this->nToDo];
}
}
unsigned int process()
{
DWORD result;
while ((result = WaitForSingleObject(this->hSemaphore, INFINITE))
== WAIT_OBJECT_0)
{
if (this->stop) { result = ERROR_CANCELLED; break; }
ScopedPtr<Thunk> next;
{
CSLock lock(this->criticalSection);
if (this->nToDo > 0)
{
next = this->todo[--this->nToDo];
this->todo[this->nToDo] = NULL; // for debugging
}
}
if (next) { (*next)(); }
}
return result;
}
template<typename Func>
void add(Func const &func)
{
CSLock lock(this->criticalSection);
struct FThunk : public virtual Thunk
{
Func func;
FThunk(Func const &func) : func(func) { }
void operator()() { this->func(); }
};
DWORD exitCode;
if (GetExitCodeThread(this->hThread, &exitCode) &&
exitCode == STILL_ACTIVE)
{
if (this->nToDo >= MAX_TASKS) { __debugbreak(); /*too many*/ }
if (this->todo[this->nToDo] != NULL) { __debugbreak(); }
this->todo[this->nToDo++] = new FThunk(func);
LONG prev;
if (!ReleaseSemaphore(this->hSemaphore, 1, &prev))
{ __debugbreak(); }
}
else { __debugbreak(); }
}
};
LRESULT CALLBACK MyWindowProc(
HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
enum { IDC_LISTVIEW = 101 };
switch (uMsg)
{
case WM_CREATE:
{
RECT rc; GetClientRect(hWnd, &rc);
HWND const hWndListView = CreateWindowEx(
WS_EX_CLIENTEDGE, WC_LISTVIEW, NULL,
WS_CHILDWINDOW | WS_VISIBLE | LVS_REPORT |
LVS_SHOWSELALWAYS | LVS_SINGLESEL | WS_TABSTOP,
rc.left, rc.top, rc.right - rc.left, rc.bottom - rc.top,
hWnd, (HMENU)IDC_LISTVIEW, NULL, NULL);
int const cx = GetSystemMetrics(SM_CXSMICON),
cy = GetSystemMetrics(SM_CYSMICON);
HIMAGELIST const hImgList =
ImageList_Create(
GetSystemMetrics(SM_CXSMICON),
GetSystemMetrics(SM_CYSMICON),
ILC_COLOR32, 1024, 1024);
ImageList_AddIcon(hImgList, (HICON)LoadImage(
NULL, IDI_INFORMATION, IMAGE_ICON, cx, cy, LR_SHARED));
LVCOLUMN col = { LVCF_TEXT | LVCF_WIDTH, 0, 500, TEXT("Name") };
ListView_InsertColumn(hWndListView, 0, &col);
ListView_SetExtendedListViewStyle(hWndListView,
LVS_EX_DOUBLEBUFFER | LVS_EX_FULLROWSELECT | LVS_EX_GRIDLINES);
ListView_SetImageList(hWndListView, hImgList, LVSIL_SMALL);
for (int i = 0; i < (1 << 11); i++)
{
TCHAR text[128]; _stprintf(text, _T("Item %d"), i);
LVITEM item =
{
LVIF_IMAGE | LVIF_TEXT, i, 0, 0, 0,
text, 0, I_IMAGECALLBACK
};
ListView_InsertItem(hWndListView, &item);
}
if (autoScroll)
{
SetTimer(hWnd, 0, 1, NULL);
}
break;
}
case WM_TIMER:
{
HWND const hWndListView = GetDlgItem(hWnd, IDC_LISTVIEW);
RECT rc; GetClientRect(hWndListView, &rc);
if (!ListView_Scroll(hWndListView, 0, rc.bottom - rc.top))
{
KillTimer(hWnd, 0);
}
break;
}
case WM_NULL:
{
HWND const hWndListView = GetDlgItem(hWnd, IDC_LISTVIEW);
int const iItem = (int)lParam;
if (logging)
{
_tprintf(_T("#%I64lld ms:")
_T(" Received: #%d\n"),
(QPC() - startTick) * 1000 / QPF(), iItem);
}
int const iImage = 0;
LVITEM const item = {LVIF_IMAGE, iItem, 0, 0, 0, NULL, 0, iImage};
ListView_SetItem(hWndListView, &item);
ListView_Update(hWndListView, iItem);
break;
}
case WM_NOTIFY:
{
LPNMHDR const pNMHDR = (LPNMHDR)lParam;
switch (pNMHDR->code)
{
case LVN_GETDISPINFO:
{
NMLVDISPINFO *const pInfo = (NMLVDISPINFO *)lParam;
struct Callback
{
HWND hWnd;
int iItem;
void operator()()
{
if (logging)
{
_tprintf(_T("#%I64lld ms: Sent: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
iItem);
}
PostMessage(hWnd, WM_NULL, 0, iItem);
}
};
if (pInfo->item.iImage == I_IMAGECALLBACK)
{
if (useWindowMessaging)
{
DWORD const tid =
(DWORD)GetWindowLongPtr(hWnd, GWLP_USERDATA);
PostThreadMessage(
tid, WM_NULL, 0, pInfo->item.iItem);
}
else
{
Callback callback = { hWnd, pInfo->item.iItem };
if (logging)
{
_tprintf(_T("#%I64lld ms: Queued: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
pInfo->item.iItem);
}
((BackgroundWorker *)
GetWindowLongPtr(hWnd, GWLP_USERDATA))
->add(callback);
}
}
break;
}
}
break;
}
case WM_CLOSE:
{
PostQuitMessage(0);
break;
}
}
return DefWindowProc(hWnd, uMsg, wParam, lParam);
}
DWORD WINAPI BackgroundWorkerThread(LPVOID lpParameter)
{
HWND const hWnd = (HWND)lpParameter;
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0 && msg.message != WM_QUIT)
{
if (msg.message == WM_NULL)
{
PostMessage(hWnd, msg.message, msg.wParam, msg.lParam);
}
}
return 0;
}
int _tmain(int argc, LPTSTR argv[])
{
startTick = QPC();
bool const affinity = argc >= 2 && _tcsicmp(argv[1], _T("affinity")) == 0;
if (affinity)
{ SetProcessAffinityMask(GetCurrentProcess(), 1 << 0); }
bool const log = logging; // disable temporarily
logging = false;
WNDCLASS wndClass =
{
0, &MyWindowProc, 0, 0, NULL, NULL, LoadCursor(NULL, IDC_ARROW),
GetSysColorBrush(COLOR_3DFACE), NULL, TEXT("MyClass")
};
HWND const hWnd = CreateWindow(
MAKEINTATOM(RegisterClass(&wndClass)),
affinity ? TEXT("Window (1 CPU)") : TEXT("Window (All CPUs)"),
WS_OVERLAPPEDWINDOW | WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT,
CW_USEDEFAULT, CW_USEDEFAULT, NULL, NULL, NULL, NULL);
BackgroundWorker iconLoader;
DWORD tid = 0;
if (useWindowMessaging)
{
CreateThread(NULL, 0, &BackgroundWorkerThread, (LPVOID)hWnd, 0, &tid);
SetWindowLongPtr(hWnd, GWLP_USERDATA, tid);
}
else { SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)&iconLoader); }
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0)
{
if (!IsDialogMessage(hWnd, &msg))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if (msg.message == WM_TIMER ||
!PeekMessage(&msg, NULL, 0, 0, PM_NOREMOVE))
{ logging = log; }
}
PostThreadMessage(tid, WM_QUIT, 0, 0);
return 0;
}
Based on the inter-thread timings you posted at http://ideone.com/fa2fM, it looks like there is a fairness issue at play here. Based solely on this assumption, here is my reasoning as to the apparent cause of the perceived lag and a potential solution to the problem.
It looks like there is a large number of LVN_GETDISPINFO messages being generated and processed on one thread by the window proc, and while the background worker thread is able to keep up and post messages back to the window at the same rate, the WM_NULL messages it posts are so far back in the queue that it takes time before they get handled.
When you set the processor affinity mask, you introduce more fairness into the system because the same processor must service both threads, which will limit the rate at which LVN_GETDISPINFO messages are generated relative to the non-affinity case. This means that the window proc message queue is likely not as deep when you post your WM_NULL messages, which in turn means that they will be processed 'sooner'.
It seems that you need to somehow bypass the queueing effect. Using SendMessage, SendMessageCallback or SendNotifyMessage instead of PostMessage may be ways to do this. In the SendMessage case, your worker thread will block until the window proc thread is finished its current message and processes the sent WM_NULL message, but you will be able to inject your WM_NULL messages more evenly into the message processing flow. See this page for an explanation of queued vs. non-queued message handling.
If you choose to use SendMessage, but you don't want to limit the rate at which you can obtain icons due to the blocking nature of SendMessage, then you can use a third thread. Your I/O thread would post messages to the third thread, while the third thread uses SendMessage to inject icon updates into the UI thread. In this fashion, you have control of the queue of satisfied icon requests, instead of interleaving them into the window proc message queue.
As for the difference in behaviour between Win7 and WinXP, there may be a number of reasons why you don't appear to see this effect on Win7. It could be that the list view common control is implemented differently and limits the rate at which LVN_GETDISPINFO messages are generated. Or perhaps the thread scheduling mechanism in Win7 switches thread contexts more frequently or more fairly.
EDIT:
Based on your latest change, try the following:
...
struct Callback
{
HWND hWnd;
int iItem;
void operator()()
{
if (logging)
{
_tprintf(_T("#%I64lld ms: Sent: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
iItem);
}
SendNotifyMessage(hWnd, WM_NULL, 0, iItem); // <----
}
};
...
DWORD WINAPI BackgroundWorkerThread(LPVOID lpParameter)
{
HWND const hWnd = (HWND)lpParameter;
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0 && msg.message != WM_QUIT)
{
if (msg.message == WM_NULL)
{
SendNotifyMessage(hWnd, msg.message, msg.wParam, msg.lParam); // <----
}
}
return 0;
}
EDIT 2:
After establishing that the LVN_GETDISPINFO message are being placed in the queue using SendMessage instead of PostMessage, we can't use SendMessage ourselves to bypass them.
Still proceeding on the assumption that there is a glut of messages being processed by the wndproc before the icon results are being sent back from the worker thread, we need another way to get those updates handled as soon as they are ready.
Here's the idea:
Worker thread places results in a synchronized queue-like data structure, and then posts (using PostMessage) a WM_NULL message to the wndproc (to ensure that the wndproc gets executed sometime in the future).
At the top of the wndproc (before the case statements), the UI thread checks the synchronized queue-like data structure to see if there are any results, and if so, removes one or more results from the queue-like data structure and processes them.
The issue has less to do with thread affinity and more to do with telling the listview that it needs to update the list item every time you update it. Because you do not add the LVIF_DI_SETITEM flag to pInfo->item.mask in your LVN_GETDISPINFO handler, and because you call ListView_Update manually, when you call ListView_Update, the list view invalidates any item that still has its iImage set to I_IMAGECALLBACK.
You can fix this in one of two ways (or a combination of both):
Remove ListView_Update from your WM_NULL handler. The list view will automatically redraw the items you set the image for in your WM_NULL handler when you set them, and it will not attempt to redraw items you haven't set the image for more than once.
Set LVIF_DI_SETITEM flag in pInfo->item.mask in your LVN_GETDISPINFO handler and set pInfo->item.iImage to a value that is not I_IMAGECALLBACK.
I repro'd similar awful behavior doing a full page scroll on Vista. Doing either of the above fixed the issue while still updating the icons asynchronously.
Its plausible to suggest that this is related to XPs hyper threading/logical core scheduling and I will second IvoTops suggestion to try this with hyper-threading disabled. Please try this and let us know.
Why? Because:
a) Logical cores offer bad parallelism for CPU bound tasks. Running multiple CPU bound threads on two logical HT cores on the same physical core is detrimental to performance. See for example, this intel paper - it explains how enabling HT might cause typical server threads to incur an increase in latency or processing time for each request (while improving net throughput.)
b) Windows 7 does indeed have some HT/SMT (symmetrical multi threading) scheduling improvements. Mark Russinovich's slides here mention this briefly. Although they claim that XP scheduler is SMT aware, the fact that Windows 7 explicitly fixes something around this, implies there could be something lacking in XP. So I'm guessing that the OS isn't setting the thread affinity to the second core appropriately. (perhaps because the second core might not be idle at the instant of scheduling your second thread, to speculate wildly).
You wrote "I just tried setting the CPU affinity of the process (or even the individual threads) to all potential combinations I could think of, on the same and on different logical CPUs".
Can we try to verify that the execution is actually on the second core, once you set this?
You can visually check this in task manager or perfmon/perf counters
Maybe post the code where you set the affinity of the threads (I note that you are not checking the return value on SetProcessorAffinity, do check that as well.)
If Windows perf counters dont help, Intel's VTune Performance Analyzer is helpful for exactly this kind of stuff.
I think you can force the thread affinity manually using task manager.
One more thing: Your core i5 is either Nehalem or SandyBridge micro-architecture. Nehalem and later HT implementation is significantly different from the prior generation architectures (Core,etc). In fact Microsoft recommended disabling HT for running Biztalk server on pre-Nehalem systems. So perhaps Windows XP does not handle the new HT architecture well.
This might be a hyperthreading bug. To check if that's what causing it run your faulty program with Hyperthreading turned off (in the bios you can usually switch it off). I have run into two issues in the last five years that only surfaced when hyperthreading was enabled.
I'm trying to write some code to communicate with wpa_supplicant using DBUS. As I'm working in an embedded system (ARM), I'd like to avoid the use of Python or the GLib. I'm wondering if I'm stupid because I really have the feeling that there is no nice and clear documentation about D-Bus. Even with the official one, I either find the documentation too high level, or the examples shown are using Glib! Documentation I've looked at: http://www.freedesktop.org/wiki/Software/dbus
I found a nice article about using D-Bus in C: http://www.matthew.ath.cx/articles/dbus
However, this article is pretty old and not complete enough! I also found the c++-dbus API but also here, I don't find ANY documentation! I've been digging into wpa_supplicant and NetworkManager source code but it's quite a nightmare! I've been looking into the "low-level D-Bus API" as well but this doesn't tell me how to extract a string parameter from a D-Bus message! http://dbus.freedesktop.org/doc/api/html/index.html
Here is some code I wrote to test a little but I really have trouble to extract string values. Sorry for the long source code but if someone want to try it ... My D-Bus configuration seems fine because it "already" catches "StateChanged" signals from wpa_supplicant but cannot print the state:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <dbus/dbus.h>
//#include "wpa_supp_dbus.h"
/* Content of wpa_supp_dbus.h */
#define WPAS_DBUS_SERVICE "fi.epitest.hostap.WPASupplicant"
#define WPAS_DBUS_PATH "/fi/epitest/hostap/WPASupplicant"
#define WPAS_DBUS_INTERFACE "fi.epitest.hostap.WPASupplicant"
#define WPAS_DBUS_PATH_INTERFACES WPAS_DBUS_PATH "/Interfaces"
#define WPAS_DBUS_IFACE_INTERFACE WPAS_DBUS_INTERFACE ".Interface"
#define WPAS_DBUS_NETWORKS_PART "Networks"
#define WPAS_DBUS_IFACE_NETWORK WPAS_DBUS_INTERFACE ".Network"
#define WPAS_DBUS_BSSIDS_PART "BSSIDs"
#define WPAS_DBUS_IFACE_BSSID WPAS_DBUS_INTERFACE ".BSSID"
int running = 1;
void stopLoop(int sig)
{
running = 0;
}
void sendScan()
{
// TODO !
}
void loop(DBusConnection* conn)
{
DBusMessage* msg;
DBusMessageIter args;
DBusMessageIter subArgs;
int argType;
int i;
int buffSize = 1024;
char strValue[buffSize];
const char* member = 0;
sendScan();
while (running)
{
// non blocking read of the next available message
dbus_connection_read_write(conn, 0);
msg = dbus_connection_pop_message(conn);
// loop again if we haven't read a message
if (!msg)
{
printf("No message received, waiting a little ...\n");
sleep(1);
continue;
}
else printf("Got a message, will analyze it ...\n");
// Print the message member
printf("Got message for interface %s\n",
dbus_message_get_interface(msg));
member = dbus_message_get_member(msg);
if(member) printf("Got message member %s\n", member);
// Check has argument
if (!dbus_message_iter_init(msg, &args))
{
printf("Message has no argument\n");
continue;
}
else
{
// Go through arguments
while(1)
{
argType = dbus_message_iter_get_arg_type(&args);
if (argType == DBUS_TYPE_STRING)
{
printf("Got string argument, extracting ...\n");
/* FIXME : got weird characters
dbus_message_iter_get_basic(&args, &strValue);
*/
/* FIXME : segmentation fault !
dbus_message_iter_get_fixed_array(
&args, &strValue, buffSize);
*/
/* FIXME : segmentation fault !
dbus_message_iter_recurse(&args, &subArgs);
*/
/* FIXME : deprecated!
if(dbus_message_iter_get_array_len(&args) > buffSize)
printf("message content to big for local buffer!");
*/
//printf("String value was %s\n", strValue);
}
else
printf("Arg type not implemented yet !\n");
if(dbus_message_iter_has_next(&args))
dbus_message_iter_next(&args);
else break;
}
printf("No more arguments!\n");
}
// free the message
dbus_message_unref(msg);
}
}
int main(int argc, char* argv[])
{
DBusError err;
DBusConnection* conn;
int ret;
char signalDesc[1024]; // Signal description as string
// Signal handling
signal(SIGKILL, stopLoop);
signal(SIGTERM, stopLoop);
// Initialize err struct
dbus_error_init(&err);
// connect to the bus
conn = dbus_bus_get(DBUS_BUS_SYSTEM, &err);
if (dbus_error_is_set(&err))
{
fprintf(stderr, "Connection Error (%s)\n", err.message);
dbus_error_free(&err);
}
if (!conn)
{
exit(1);
}
// request a name on the bus
ret = dbus_bus_request_name(conn, WPAS_DBUS_SERVICE, 0, &err);
if (dbus_error_is_set(&err))
{
fprintf(stderr, "Name Error (%s)\n", err.message);
dbus_error_free(&err);
}
/* Connect to signal */
// Interface signal ..
sprintf(signalDesc, "type='signal',interface='%s'",
WPAS_DBUS_IFACE_INTERFACE);
dbus_bus_add_match(conn, signalDesc, &err);
dbus_connection_flush(conn);
if (dbus_error_is_set(&err))
{
fprintf(stderr, "Match Error (%s)\n", err.message);
exit(1);
}
// Network signal ..
sprintf(signalDesc, "type='signal',interface='%s'",
WPAS_DBUS_IFACE_NETWORK);
dbus_bus_add_match(conn, signalDesc, &err);
dbus_connection_flush(conn);
if (dbus_error_is_set(&err))
{
fprintf(stderr, "Match Error (%s)\n", err.message);
exit(1);
}
// Bssid signal ..
sprintf(signalDesc, "type='signal',interface='%s'",
WPAS_DBUS_IFACE_BSSID);
dbus_bus_add_match(conn, signalDesc, &err);
dbus_connection_flush(conn);
if (dbus_error_is_set(&err))
{
fprintf(stderr, "Match Error (%s)\n", err.message);
exit(1);
}
// Do main loop
loop(conn);
// Main loop exited
printf("Main loop stopped, exiting ...\n");
dbus_connection_close(conn);
return 0;
}
Any pointer to any nice, complete, low-level C tutorial is strongly appreciated! I'm also planning to do some remote method call, so if the tutorial covers this subject it would be great! Saying I'm not very smart because I don't get it with the official tutorial is also appreciated :-p!
Or is there another way to communicate with wpa_supplicant (except using wpa_cli)?
EDIT 1:
Using 'qdbusviewer' and the introspection capabilty, this helped me a lot discovering what and how wpa_supplicant works using dbus. Hopping that this would help someone else!
Edit 2:
Will probably come when I'll find a way to read string values on D-Bus!
You have given up the tools that would help you to learn D-Bus more easily and are using the low level libdbus implementation, so maybe you deserve to be in pain. BTW, are you talking about ARM, like a cell phone ARM ? With maybe 500 Mhz and 256 MB RAM ? In this case the processor is well suited to using glib, Qt or even python. And D-Bus is most useful when you're writing asynchronous event driven code, with an integrated main loop, for example from glib, even when you're using the low level libdbus (it has functions to connect to the glib main loop, for example).
Since you're using the low level library, then documentation is what you already have:
http://dbus.freedesktop.org/doc/api/html/index.html
Also, libdbus source code is also part of the documentation:
http://dbus.freedesktop.org/doc/api/html/files.html
The main entry point for the documentation is the Modules page (in particular, the public API section):
http://dbus.freedesktop.org/doc/api/html/modules.html
For message handling, the section DBusMessage is the relevant one:
DBusMessage
There you have the documentation for functions that parse item values. In your case, you started with a dbus_message_iter_get_basic. As described in the docs, retrieving the string requires a const char ** variable, since the returned value will point to the pre-allocated string in the received message:
So for int32 it should be a "dbus_int32_t*" and for string a "const char**". The returned value is by reference and should not be freed.
So you can't define an array, because libdbus won't copy the text to your array. If you need to save the string, first get the constant string reference, then strcpy to your own array.
Then you tried to get a fixed array without moving the iterator. You need a call to the next iterator (dbus_message_iter_next) between the basic string and the fixed array. Same right before recursing into the sub iterator.
Finally, you don't call get_array_len to get the number of elements on the array. From the docs, it only returns byte counts. Instead you loop over the sub iterator using iter_next the same way you should have done with the main iterator. After you have iterated past the end of the array, dbus_message_iter_get_arg_type will return DBUS_TYPE_INVALID.
For more info, read the reference manual, don't look for a tutorial. Or just use a reasonable d-bus implementation:
https://developer.gnome.org/gio/2.36/gdbus-codegen.html
GIO's GDBus automatically creates wrappers for your d-bus calls.
http://qt-project.org/doc/qt-4.8/intro-to-dbus.html
http://dbus.freedesktop.org/doc/dbus-python/doc/tutorial.html
etc.
You don't need to use/understand working of dbus If you just need to write a C program to communicate with wpa_supplicant. I reverse engineered the wpa_cli's source code. Went through its implementation and used functions provided in wpa_ctrl.h/c. This implementation takes care of everything. You can use/modify whatever you want, build your executable and you're done!
Here's the official link to wpa_supplicant's ctrl_interface:
http://hostap.epitest.fi/wpa_supplicant/devel/ctrl_iface_page.html
I doubt this answer will still be relevant to the author of this question,
but for anybody who stumbles upon this like I did:
The situation is now better than all those years ago if you don't want to include GTK/QT in your project to access dbus.
There is dbus API in Embedded Linux Library by Intel (weird I remember it being open, maybe it is just for registered users now?)
and systemd sd-bus library now offers public API. You probably run systemd anyway unless you have a really constrained embedded system.
I have worked with GDbus, dbus-cpp and sd-bus and although I wanted a C++ library,
I found sd-bus to be the simplest and the least problematic experience.
I did not try its C++ bindings but they also look nice
#include <stdio.h>
#include <systemd/sd-bus.h>
#include <stdlib.h>
const char* wpa_service = "fi.w1.wpa_supplicant1";
const char* wpa_root_obj_path = "/fi/w1/wpa_supplicant1";
const char* wpa_root_iface = "fi.w1.wpa_supplicant1";
sd_bus_error error = SD_BUS_ERROR_NULL;
sd_bus* system_bus = NULL;
sd_event* loop = NULL;
sd_bus_message* reply = NULL;
void cleanup() {
sd_event_unref(loop);
sd_bus_unref(system_bus);
sd_bus_message_unref(reply);
sd_bus_error_free(&error);
}
void print_error(const char* msg, int code) {
fprintf(stderr, "%s %s\n", msg, strerror(-code));
exit(EXIT_FAILURE);
}
const char* get_interface(const char* iface) {
int res = sd_bus_call_method(system_bus,
wpa_service,
wpa_root_obj_path,
wpa_root_iface,
"GetInterface",
&error,
&reply,
"s",
"Ifname", "s", iface,
"Driver", "s", "nl80211");
if (res < 0) {
fprintf(stderr, "(get) error response: %s\n", error.message);
return NULL;
}
const char* iface_path;
/*
* an object path was returned in reply
* this works like an iterator, if a method returns (osu), you could call message_read_basic in succession
* with arguments SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_STRING, SD_BUS_TYPE_UINT32 or you could
* call sd_bus_message_read() and provides the signature + arguments in one call
* */
res = sd_bus_message_read_basic(reply, SD_BUS_TYPE_OBJECT_PATH, &iface_path);
if (res < 0) {
print_error("getIface: ", res);
return NULL;
}
return iface_path;
}
const char* create_interface(const char* iface) {
int res = sd_bus_call_method(system_bus,
wpa_service,
wpa_root_obj_path,
wpa_root_iface,
"CreateInterface",
&error,
&reply,
"a{sv}", 2, //pass array of str:variant (dbus dictionary) with 2
//entries to CreateInterface
"Ifname", "s", iface, // "s" variant parameter contains string, then pass the value
"Driver", "s", "nl80211");
if (res < 0) {
fprintf(stderr, "(create) error response: %s\n", error.message);
return NULL;
}
const char* iface_path;
res = sd_bus_message_read_basic(reply, SD_BUS_TYPE_OBJECT_PATH, &iface_path);
if (res < 0) {
print_error("createIface: ", res);
}
return iface_path;
}
int main() {
int res;
const char* iface_path;
//open connection to system bus - default either opens or reuses existing connection as necessary
res = sd_bus_default_system(&system_bus);
if (res < 0) {
print_error("open: ", res);
}
//associate connection with event loop, again default either creates or reuses existing
res = sd_event_default(&loop);
if (res < 0) {
print_error("event: ", res);
}
// get obj. path to the wireless interface on dbus so you can call methods on it
// this is a wireless interface (e.g. your wifi dongle) NOT the dbus interface
// if you don't know the interface name in advance, you will have to read the Interfaces property of
// wpa_supplicants root interface — call Get method on org.freedesktop.DBus properties interface,
// while some libraries expose some kind of get_property convenience function sd-bus does not
const char* ifaceName = "wlp32s0f3u2";
if (!(iface_path = get_interface(ifaceName))) { //substitute your wireless iface here
// sometimes the HW is present and listed in "ip l" but dbus does not reflect that, this fixes it
if (!(iface_path = create_interface(ifaceName))) {
fprintf(stderr, "can't create iface: %s" , ifaceName);
cleanup();
return EXIT_FAILURE;
}
}
/*
call methods with obj. path iface_path and dbus interface of your choice
this will likely be "fi.w1.wpa_supplicant1.Interface", register for signals etc...
you will need the following to receive those signals
*/
int runForUsec = 1000000; //usec, not msec!
sd_event_run(loop, runForUsec); //or sd_event_loop(loop) if you want to loop forever
cleanup();
printf("Finished OK\n");
return 0;
}
I apologize if the example above does not work perfectly. It is an excerpt from an old project I rewrote to C from C++ (I think it's C(-ish), compiler does not protest and you asked for C) but I can't test it as all my dongles refuse to work with my desktop right now. It should give you a general idea though.
Note that you will likely encounter several magical or semi-magical issues.
To ensure smooth developing/testing do the following:
make sure other network management applications are disabled (networkmanager, connman...)
restart the wpa_supplicant service
make sure the wireless interface is UP in ip link
Also, because is not that well-documented right now:
You can access arrays and inner variant values by sd_bus_message_enter_container
and _exit counterpart. sd_bus_message_peek_type might come handy while doing that.
Or sd_bus_message_read_array for a homogenous array.
The below snippet works for me
if (argType == DBUS_TYPE_STRING)
{
printf("Got string argument, extracting ...\n");
char* strBuffer = NULL;
dbus_message_iter_get_basic(&args, &strBuffer);
printf("Received string: \n %s \n",strBuffer);
}