CreateThread doesn't work in my code - multithreading

I wonder how to solve this problem.
I notice that CreateThread() doesn't work well in this code:
DWORD threadFunc1(LPVOID lParam)
{
int cur = (int)lParam
while(1)
{
//Job1...
//Reason
break;
}
Start(cur + 1);
Sleep(100);
}
void Start(int startNum)
{
....
CreateThread(NULL, NULL, (LPTHREAD_START_ROUTINE)threadFunc1, &startNum, 0, &dwThreadId);
...
}
void btnClicking()
{
Start(0);
}
In this code, there is a thread create by Start() and it calls Start() when the thread ends.
The second created thread does not work. I think the first thread disappeared and the second thread is destroyed.
What is the best way to solve this?
OS: Win 7 64bit Ultimate.
Tool: Visual Studio 2008.

It does not work because your code has bugs in it. The signature of your thread function is wrong, and you are passing the startNum value to the thread in the wrong way.
Try this instead:
DWORD WINAPI threadFunc1(LPVOID lParameter)
{
int cur = (int) lParameter;
while(1)
{
//Job1...
//Reason
break;
}
Start(cur + 1);
Sleep(100);
}
void Start(int startNum)
{
....
HANDLE hThread = CreateThread(NULL, NULL, &threadFunc1, (LPVOID) startNum, 0, &dwThreadId);
if (hThread != NULL)
{
// ... store it somewhere or close it, otherwise you are leaking it...
}
...
}
void btnClicking()
{
Start(0);
}

Related

Win32 Events -- I need to be notified when the event is cleared

Win 7, x64, Visual Studio Community 2015, C++
I have a thread which I need to pause/unpause or terminate, which I currently do with manually-reset "run" or "kill" events. The loop in the thread pauses each time for 5000ms.
My goal is to be able to stop waiting or kill the thread while in the middle of the wait.
The problem is the way I currently have it set up, I need to be notified when the "run" event goes to the non-signalled state, but there is no way to do this, unless I create an event with the inverted polarity, but this seems like a kludge. In short, I need a level-sensitive signal, not edge sensitive.
Maybe the event should just toggle the run state?
This is the thread function:
DWORD WINAPI DAQ::_fakeOutFn(void *param) {
DAQ *pThis = (DAQ *)param;
const DWORD timeout = 5000;
bool running = false;
HANDLE handles[] = { pThis->hFakeTaskRunningEvent, pThis->hFakeTaskKillEvent };
do {
DWORD result = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
switch (result) {
case WAIT_OBJECT_0: // Run started or continued
running = true;
pThis->outputIndex++;
if (pThis->outputIndex >= pThis->numSamples)
pThis->outputIndex = 0;
// Wait here
// Not sure how to cancel this if the TaskRunningEvent goes false during the wait
DWORD result2 = WaitForMultipleObjects(2, handles, FALSE, timeout);
// Check result2, and 'continue' the loop if hFakeTaskRunningEvent went to NON-SIGNALLED state
break;
case WAIT_OBJECT_0 + 1: // Kill requested
running = false;
break;
default:
_ASSERT_EXPR(FALSE, L"Wait error");
break;
}
} while (running);
return 0;
}
Use separate events for the running and resume states. Then you can reset the resume event to pause, and signal the event to resume. The running event should be used to let the thread know when it has work to do, not when it should pause that work for a period of time.
DWORD WINAPI DAQ::_fakeOutFn(void *param)
{
DAQ *pThis = (DAQ *)param;
bool running = false;
HANDLE handles[] = { pThis->hFakeTaskRunningEvent, pThis->hFakeTaskKillEvent };
do
{
DWORD result = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
switch (result)
{
case WAIT_OBJECT_0: // Run started
{
running = true;
pThis->outputIndex++;
if (pThis->outputIndex >= pThis->numSamples)
pThis->outputIndex = 0;
// check for pause here
HANDLE handles2[] = { pThis->hFakeTaskResumeEvent, pThis->hFakeTaskKillEvent };
DWORD result2 = WaitForMultipleObjects(2, handles2, FALSE, INFINITE);
switch (result2)
{
case WAIT_OBJECT_0;
break;
case WAIT_OBJECT_0 + 1: // Kill requested
running = false;
break;
default:
_ASSERT_EXPR(FALSE, L"Wait error");
break;
}
if (!running) break;
// continue working...
break;
}
case WAIT_OBJECT_0 + 1: // Kill requested
running = false;
break;
default:
_ASSERT_EXPR(FALSE, L"Wait error");
break;
}
}
while (running);
return 0;
}
here i be use not Events, but queue Apc to this thread with 'command' (run,pause,exit). however need know more about task, for select best solution. you writing service ?
struct DAQ
{
HANDLE _hEvent;
enum STATE {
running,
paused,
exit
} _state;
DAQ()
{
_hEvent = 0;
}
~DAQ()
{
if (_hEvent)
{
ZwClose(_hEvent);
}
}
NTSTATUS Init()
{
return ZwCreateEvent(&_hEvent, EVENT_ALL_ACCESS, 0, NotificationEvent, FALSE);
}
void Close()
{
if (HANDLE hEvent = InterlockedExchangePointer(&_hEvent, 0))
{
ZwClose(hEvent);
}
}
DWORD fakeOutFn()
{
DbgPrint("running\n");
_state = running;
ZwSetEvent(_hEvent, 0);
static LARGE_INTEGER Interval = { 0, MINLONG };
do ; while (0 <= ZwDelayExecution(TRUE, &Interval) && _state != exit);
DbgPrint("exit\n");
return 0;
}
static DWORD WINAPI _fakeOutFn(PVOID pThis)
{
return ((DAQ*)pThis)->fakeOutFn();
}
void OnApc(STATE state)
{
_state = state;
static PCSTR stateName[] = { "running", "paused" };
if (state < RTL_NUMBER_OF(stateName))
{
DbgPrint("%s\n", stateName[state]);
}
}
static void WINAPI _OnApc(PVOID pThis, PVOID state, PVOID)
{
((DAQ*)pThis)->OnApc((STATE)(ULONG_PTR)state);
}
};
void test()
{
DAQ d;
if (0 <= d.Init())
{
if (HANDLE hThread = CreateThread(0, 0, DAQ::_fakeOutFn, &d, 0, 0))
{
if (STATUS_SUCCESS == ZwWaitForSingleObject(d._hEvent, FALSE, 0))// need for not QueueApc too early. in case ServiceMain this event not need
{
d.Close();
int n = 5;
do
{
DAQ::STATE state;
if (--n)
{
state = (n & 1) != 0 ? DAQ::running : DAQ::paused;
}
else
{
state = DAQ::exit;
}
ZwQueueApcThread(hThread, DAQ::_OnApc, &d, (PVOID)state, 0);
} while (n);
}
ZwWaitForSingleObject(hThread, FALSE, 0);
ZwClose(hThread);
}
}
}

DoModal in critical section

in an parallel loop, there is a critical section. I try to execute an mfc dialog with DoModal in the critical section, however since main thread waits for parallel threads, there is no way for my dialog to show up and execute. In order to break this dependency, I create an executable and I run it as a process within my parallel loop. When the process shows dialog and gets the information. It returns and other threads keeps running.
However my team leader insist that there is a better way to do it which I couldn't figure out after doing hours of search :\
I tried a seperate thread in parallel for. It didn't worked.
I tried CWinThread (google say it is gui thread :\ which didn't helped)
I tired creating an exe and running it. That worked :)
int someCriticDialog()
{
#pragma omp critic (showCriticDlg)
{
CMyDialog ccc;
ccc.DoModal();
/* However the code below works
CreateProcess("someCriticDlg.exe", null, &hProcess);
WaitForSingeObject(hProcess, INFINITE);
*/
}
}
#pragma omp parallel
for (int i = 0; i < 5; i++)
someCriticDialog();
Let's say here is the problem:
void trouble_maker()
{
Sleep(10000);//application stops for 10 seconds
}
You can use PostMessage + PeekMessage + modal dialog to wait for it to finish through GUI window:
void PumpWaitingMessages()
{
MSG msg;
while (::PeekMessage(&msg, NULL, NULL, NULL, PM_NOREMOVE))
if (!AfxGetThread()->PumpMessage())
return;
}
BEGIN_MESSAGE_MAP(CMyDialog, CDialog)
ON_COMMAND(2000, OnDoSomething)
ON_COMMAND(IDCANCEL, OnCancel)
END_MESSAGE_MAP()
CMyDialog::CMyDialog(CWnd* par /*=NULL*/) : CDialog(IDD_DIALOG1, par)
{
working = false;
stop = false;
}
BOOL CMyDialog::OnInitDialog()
{
BOOL res = CDialog::OnInitDialog();
//call the function "OnDoSomething", but don't call it directly
PostMessage(WM_COMMAND, 2000, 0);
return res;
}
void CMyDialog::OnCancel()
{
if (working)
{
stop = true;
}
else
{
CDialog::OnCancel();
}
}
void CMyDialog::OnDoSomething()
{
HANDLE h = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)&trouble_maker, NULL, 0, NULL);
working = true;
for (;;)
{
if (WAIT_TIMEOUT != WaitForSingleObject(h, 100)) break;
PumpWaitingMessages();
//update progress bar or something...
if (stop)
{
//terminate if it's safe
//BOOL res = TerminateThread(h, 0);
//CloseHandle(h);
//CDialog::OnCancel();
//return;
}
}
working = false;
MessageBox("done");
}

Implementing boost::barrier in C++11

I've been trying to get a project rid of every boost reference and switch to pure C++11.
At one point, thread workers are created which wait for a barrier to give the 'go' command, do the work (spread through the N threads) and synchronize when all of them finish. The basic idea is that the main loop gives the go order (boost::barrier .wait()) and waits for the result with the same function.
I had implemented in a different project a custom made Barrier based on the Boost version and everything worked perfectly. Implementation is as follows:
Barrier.h:
class Barrier {
public:
Barrier(unsigned int n);
void Wait(void);
private:
std::mutex counterMutex;
std::mutex waitMutex;
unsigned int expectedN;
unsigned int currentN;
};
Barrier.cpp
Barrier::Barrier(unsigned int n) {
expectedN = n;
currentN = expectedN;
}
void Barrier::Wait(void) {
counterMutex.lock();
// If we're the first thread, we want an extra lock at our disposal
if (currentN == expectedN) {
waitMutex.lock();
}
// Decrease thread counter
--currentN;
if (currentN == 0) {
currentN = expectedN;
waitMutex.unlock();
currentN = expectedN;
counterMutex.unlock();
} else {
counterMutex.unlock();
waitMutex.lock();
waitMutex.unlock();
}
}
This code has been used on iOS and Android's NDK without any problems, but when trying it on a Visual Studio 2013 project it seems only a thread which locked a mutex can unlock it (assertion: unlock of unowned mutex).
Is there any non-spinning (blocking, such as this one) version of barrier that I can use that works for C++11? I've only been able to find barriers which used busy-waiting which is something I would like to prevent (unless there is really no reason for it).
class Barrier {
public:
explicit Barrier(std::size_t iCount) :
mThreshold(iCount),
mCount(iCount),
mGeneration(0) {
}
void Wait() {
std::unique_lock<std::mutex> lLock{mMutex};
auto lGen = mGeneration;
if (!--mCount) {
mGeneration++;
mCount = mThreshold;
mCond.notify_all();
} else {
mCond.wait(lLock, [this, lGen] { return lGen != mGeneration; });
}
}
private:
std::mutex mMutex;
std::condition_variable mCond;
std::size_t mThreshold;
std::size_t mCount;
std::size_t mGeneration;
};
Use a std::condition_variable instead of a std::mutex to block all threads until the last one reaches the barrier.
class Barrier
{
private:
std::mutex _mutex;
std::condition_variable _cv;
std::size_t _count;
public:
explicit Barrier(std::size_t count) : _count(count) { }
void Wait()
{
std::unique_lock<std::mutex> lock(_mutex);
if (--_count == 0) {
_cv.notify_all();
} else {
_cv.wait(lock, [this] { return _count == 0; });
}
}
};
Here's my version of the accepted answer above with Auto reset behavior for repetitive use; this was achieved by counting up and down alternately.
/**
* #brief Represents a CPU thread barrier
* #note The barrier automatically resets after all threads are synced
*/
class Barrier
{
private:
std::mutex m_mutex;
std::condition_variable m_cv;
size_t m_count;
const size_t m_initial;
enum State : unsigned char {
Up, Down
};
State m_state;
public:
explicit Barrier(std::size_t count) : m_count{ count }, m_initial{ count }, m_state{ State::Down } { }
/// Blocks until all N threads reach here
void Sync()
{
std::unique_lock<std::mutex> lock{ m_mutex };
if (m_state == State::Down)
{
// Counting down the number of syncing threads
if (--m_count == 0) {
m_state = State::Up;
m_cv.notify_all();
}
else {
m_cv.wait(lock, [this] { return m_state == State::Up; });
}
}
else // (m_state == State::Up)
{
// Counting back up for Auto reset
if (++m_count == m_initial) {
m_state = State::Down;
m_cv.notify_all();
}
else {
m_cv.wait(lock, [this] { return m_state == State::Down; });
}
}
}
};
Seem all above answers don't work in the case the barrier is placed too near
Example: Each thread run the while loop look like this:
while (true)
{
threadBarrier->Synch();
// do heavy computation
threadBarrier->Synch();
// small external calculations like timing, loop count, etc, ...
}
And here is the solution using STL:
class ThreadBarrier
{
public:
int m_threadCount = 0;
int m_currentThreadCount = 0;
std::mutex m_mutex;
std::condition_variable m_cv;
public:
inline ThreadBarrier(int threadCount)
{
m_threadCount = threadCount;
};
public:
inline void Synch()
{
bool wait = false;
m_mutex.lock();
m_currentThreadCount = (m_currentThreadCount + 1) % m_threadCount;
wait = (m_currentThreadCount != 0);
m_mutex.unlock();
if (wait)
{
std::unique_lock<std::mutex> lk(m_mutex);
m_cv.wait(lk);
}
else
{
m_cv.notify_all();
}
};
};
And the solution for Windows:
class ThreadBarrier
{
public:
SYNCHRONIZATION_BARRIER m_barrier;
public:
inline ThreadBarrier(int threadCount)
{
InitializeSynchronizationBarrier(
&m_barrier,
threadCount,
8000);
};
public:
inline void Synch()
{
EnterSynchronizationBarrier(
&m_barrier,
0);
};
};

Multithreaded program becomes nonresponsive on _multiple_ CPUs, but fine on a single CPU (when updating a ListView)

Update: I've reproduced the problem! Scroll lower to see the code.
Quick Notes
My Core i5 CPU has 2 cores, hyperthreading.
If I call SetProcessAffinityMask(GetCurrentProcess(), 1), everything is fine, even though the program is still multithreaded.
If I don't do that, and the program is running on Windows XP (it's fine on Windows 7 x64!), my GUI starts locking up for several seconds while I'm scrolling the list view and the icons are loading.
The Problem
Basically, when I run the program posted below (a reduced version of my original code) on Windows XP (Windows 7 is fine), unless I force the same logical CPU for all my threads, the program UI starts lagging behind by half a second or so.
(Note: Lots of edits to this post here, as I investigated the problem further.)
Note that the number of threads is the same -- only the affinity mask is different.
I've tried this out using two different methods of message-passing: the built-in GetMessage as well as my own BackgroundWorker.
The result? BackgroundWorker benefits from affinity for 1 logical CPU (virtually no lag), whereas GetMessage is completely hurt by this, (lag is now many seconds long).
I can't figure out why that would be happening -- shouldn't multiple CPUs work better than a single CPU?!
Why would there be such a lag, when the number of threads is the same?
More stats:
GetLogicalProcessorInformation returns:
0x0: {ProcessorMask=0x0000000000000003 Relationship=RelationProcessorCore ...}
0x1: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x2: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x3: {ProcessorMask=0x0000000000000003 Relationship=RelationCache ...}
0x4: {ProcessorMask=0x000000000000000f Relationship=RelationProcessorPackage ...}
0x5: {ProcessorMask=0x000000000000000c Relationship=RelationProcessorCore ...}
0x6: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x7: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x8: {ProcessorMask=0x000000000000000c Relationship=RelationCache ...}
0x9: {ProcessorMask=0x000000000000000f Relationship=RelationCache ...}
0xa: {ProcessorMask=0x000000000000000f Relationship=RelationNumaNode ...}
The Code
The code below should shows this problem on Windows XP SP3.
(At least, it does on my computer!)
Compare these two:
Run the program normally, then scroll. You should see lag.
Run the program with the affinity command-line argument, then scroll. It should be almost completely smooth.
Why would this happen?
#define _WIN32_WINNT 0x502
#include <tchar.h>
#include <Windows.h>
#include <CommCtrl.h>
#pragma comment(lib, "kernel32.lib")
#pragma comment(lib, "comctl32.lib")
#pragma comment(lib, "user32.lib")
LONGLONG startTick = 0;
LONGLONG QPC()
{ LARGE_INTEGER v; QueryPerformanceCounter(&v); return v.QuadPart; }
LONGLONG QPF()
{ LARGE_INTEGER v; QueryPerformanceFrequency(&v); return v.QuadPart; }
bool logging = false;
bool const useWindowMessaging = true; // GetMessage() or BackgroundWorker?
bool const autoScroll = false; // for testing
class BackgroundWorker
{
struct Thunk
{
virtual void operator()() = 0;
virtual ~Thunk() { }
};
class CSLock
{
CRITICAL_SECTION& cs;
public:
CSLock(CRITICAL_SECTION& criticalSection)
: cs(criticalSection)
{ EnterCriticalSection(&this->cs); }
~CSLock() { LeaveCriticalSection(&this->cs); }
};
template<typename T>
class ScopedPtr
{
T *p;
ScopedPtr(ScopedPtr const &) { }
ScopedPtr &operator =(ScopedPtr const &) { }
public:
ScopedPtr() : p(NULL) { }
explicit ScopedPtr(T *p) : p(p) { }
~ScopedPtr() { delete p; }
T *operator ->() { return p; }
T &operator *() { return *p; }
ScopedPtr &operator =(T *p)
{
if (this->p != NULL) { __debugbreak(); }
this->p = p;
return *this;
}
operator T *const &() { return this->p; }
};
Thunk **const todo;
size_t nToDo;
CRITICAL_SECTION criticalSection;
DWORD tid;
HANDLE hThread, hSemaphore;
volatile bool stop;
static size_t const MAX_TASKS = 1 << 18; // big enough for testing
static DWORD CALLBACK entry(void *arg)
{ return ((BackgroundWorker *)arg)->process(); }
public:
BackgroundWorker()
: nToDo(0), todo(new Thunk *[MAX_TASKS]), stop(false), tid(0),
hSemaphore(CreateSemaphore(NULL, 0, 1 << 30, NULL)),
hThread(CreateThread(NULL, 0, entry, this, CREATE_SUSPENDED, &tid))
{
InitializeCriticalSection(&this->criticalSection);
ResumeThread(this->hThread);
}
~BackgroundWorker()
{
// Clear all the tasks
this->stop = true;
this->clear();
LONG prev;
if (!ReleaseSemaphore(this->hSemaphore, 1, &prev) ||
WaitForSingleObject(this->hThread, INFINITE) != WAIT_OBJECT_0)
{ __debugbreak(); }
CloseHandle(this->hSemaphore);
CloseHandle(this->hThread);
DeleteCriticalSection(&this->criticalSection);
delete [] this->todo;
}
void clear()
{
CSLock lock(this->criticalSection);
while (this->nToDo > 0)
{
delete this->todo[--this->nToDo];
}
}
unsigned int process()
{
DWORD result;
while ((result = WaitForSingleObject(this->hSemaphore, INFINITE))
== WAIT_OBJECT_0)
{
if (this->stop) { result = ERROR_CANCELLED; break; }
ScopedPtr<Thunk> next;
{
CSLock lock(this->criticalSection);
if (this->nToDo > 0)
{
next = this->todo[--this->nToDo];
this->todo[this->nToDo] = NULL; // for debugging
}
}
if (next) { (*next)(); }
}
return result;
}
template<typename Func>
void add(Func const &func)
{
CSLock lock(this->criticalSection);
struct FThunk : public virtual Thunk
{
Func func;
FThunk(Func const &func) : func(func) { }
void operator()() { this->func(); }
};
DWORD exitCode;
if (GetExitCodeThread(this->hThread, &exitCode) &&
exitCode == STILL_ACTIVE)
{
if (this->nToDo >= MAX_TASKS) { __debugbreak(); /*too many*/ }
if (this->todo[this->nToDo] != NULL) { __debugbreak(); }
this->todo[this->nToDo++] = new FThunk(func);
LONG prev;
if (!ReleaseSemaphore(this->hSemaphore, 1, &prev))
{ __debugbreak(); }
}
else { __debugbreak(); }
}
};
LRESULT CALLBACK MyWindowProc(
HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
enum { IDC_LISTVIEW = 101 };
switch (uMsg)
{
case WM_CREATE:
{
RECT rc; GetClientRect(hWnd, &rc);
HWND const hWndListView = CreateWindowEx(
WS_EX_CLIENTEDGE, WC_LISTVIEW, NULL,
WS_CHILDWINDOW | WS_VISIBLE | LVS_REPORT |
LVS_SHOWSELALWAYS | LVS_SINGLESEL | WS_TABSTOP,
rc.left, rc.top, rc.right - rc.left, rc.bottom - rc.top,
hWnd, (HMENU)IDC_LISTVIEW, NULL, NULL);
int const cx = GetSystemMetrics(SM_CXSMICON),
cy = GetSystemMetrics(SM_CYSMICON);
HIMAGELIST const hImgList =
ImageList_Create(
GetSystemMetrics(SM_CXSMICON),
GetSystemMetrics(SM_CYSMICON),
ILC_COLOR32, 1024, 1024);
ImageList_AddIcon(hImgList, (HICON)LoadImage(
NULL, IDI_INFORMATION, IMAGE_ICON, cx, cy, LR_SHARED));
LVCOLUMN col = { LVCF_TEXT | LVCF_WIDTH, 0, 500, TEXT("Name") };
ListView_InsertColumn(hWndListView, 0, &col);
ListView_SetExtendedListViewStyle(hWndListView,
LVS_EX_DOUBLEBUFFER | LVS_EX_FULLROWSELECT | LVS_EX_GRIDLINES);
ListView_SetImageList(hWndListView, hImgList, LVSIL_SMALL);
for (int i = 0; i < (1 << 11); i++)
{
TCHAR text[128]; _stprintf(text, _T("Item %d"), i);
LVITEM item =
{
LVIF_IMAGE | LVIF_TEXT, i, 0, 0, 0,
text, 0, I_IMAGECALLBACK
};
ListView_InsertItem(hWndListView, &item);
}
if (autoScroll)
{
SetTimer(hWnd, 0, 1, NULL);
}
break;
}
case WM_TIMER:
{
HWND const hWndListView = GetDlgItem(hWnd, IDC_LISTVIEW);
RECT rc; GetClientRect(hWndListView, &rc);
if (!ListView_Scroll(hWndListView, 0, rc.bottom - rc.top))
{
KillTimer(hWnd, 0);
}
break;
}
case WM_NULL:
{
HWND const hWndListView = GetDlgItem(hWnd, IDC_LISTVIEW);
int const iItem = (int)lParam;
if (logging)
{
_tprintf(_T("#%I64lld ms:")
_T(" Received: #%d\n"),
(QPC() - startTick) * 1000 / QPF(), iItem);
}
int const iImage = 0;
LVITEM const item = {LVIF_IMAGE, iItem, 0, 0, 0, NULL, 0, iImage};
ListView_SetItem(hWndListView, &item);
ListView_Update(hWndListView, iItem);
break;
}
case WM_NOTIFY:
{
LPNMHDR const pNMHDR = (LPNMHDR)lParam;
switch (pNMHDR->code)
{
case LVN_GETDISPINFO:
{
NMLVDISPINFO *const pInfo = (NMLVDISPINFO *)lParam;
struct Callback
{
HWND hWnd;
int iItem;
void operator()()
{
if (logging)
{
_tprintf(_T("#%I64lld ms: Sent: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
iItem);
}
PostMessage(hWnd, WM_NULL, 0, iItem);
}
};
if (pInfo->item.iImage == I_IMAGECALLBACK)
{
if (useWindowMessaging)
{
DWORD const tid =
(DWORD)GetWindowLongPtr(hWnd, GWLP_USERDATA);
PostThreadMessage(
tid, WM_NULL, 0, pInfo->item.iItem);
}
else
{
Callback callback = { hWnd, pInfo->item.iItem };
if (logging)
{
_tprintf(_T("#%I64lld ms: Queued: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
pInfo->item.iItem);
}
((BackgroundWorker *)
GetWindowLongPtr(hWnd, GWLP_USERDATA))
->add(callback);
}
}
break;
}
}
break;
}
case WM_CLOSE:
{
PostQuitMessage(0);
break;
}
}
return DefWindowProc(hWnd, uMsg, wParam, lParam);
}
DWORD WINAPI BackgroundWorkerThread(LPVOID lpParameter)
{
HWND const hWnd = (HWND)lpParameter;
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0 && msg.message != WM_QUIT)
{
if (msg.message == WM_NULL)
{
PostMessage(hWnd, msg.message, msg.wParam, msg.lParam);
}
}
return 0;
}
int _tmain(int argc, LPTSTR argv[])
{
startTick = QPC();
bool const affinity = argc >= 2 && _tcsicmp(argv[1], _T("affinity")) == 0;
if (affinity)
{ SetProcessAffinityMask(GetCurrentProcess(), 1 << 0); }
bool const log = logging; // disable temporarily
logging = false;
WNDCLASS wndClass =
{
0, &MyWindowProc, 0, 0, NULL, NULL, LoadCursor(NULL, IDC_ARROW),
GetSysColorBrush(COLOR_3DFACE), NULL, TEXT("MyClass")
};
HWND const hWnd = CreateWindow(
MAKEINTATOM(RegisterClass(&wndClass)),
affinity ? TEXT("Window (1 CPU)") : TEXT("Window (All CPUs)"),
WS_OVERLAPPEDWINDOW | WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT,
CW_USEDEFAULT, CW_USEDEFAULT, NULL, NULL, NULL, NULL);
BackgroundWorker iconLoader;
DWORD tid = 0;
if (useWindowMessaging)
{
CreateThread(NULL, 0, &BackgroundWorkerThread, (LPVOID)hWnd, 0, &tid);
SetWindowLongPtr(hWnd, GWLP_USERDATA, tid);
}
else { SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)&iconLoader); }
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0)
{
if (!IsDialogMessage(hWnd, &msg))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if (msg.message == WM_TIMER ||
!PeekMessage(&msg, NULL, 0, 0, PM_NOREMOVE))
{ logging = log; }
}
PostThreadMessage(tid, WM_QUIT, 0, 0);
return 0;
}
Based on the inter-thread timings you posted at http://ideone.com/fa2fM, it looks like there is a fairness issue at play here. Based solely on this assumption, here is my reasoning as to the apparent cause of the perceived lag and a potential solution to the problem.
It looks like there is a large number of LVN_GETDISPINFO messages being generated and processed on one thread by the window proc, and while the background worker thread is able to keep up and post messages back to the window at the same rate, the WM_NULL messages it posts are so far back in the queue that it takes time before they get handled.
When you set the processor affinity mask, you introduce more fairness into the system because the same processor must service both threads, which will limit the rate at which LVN_GETDISPINFO messages are generated relative to the non-affinity case. This means that the window proc message queue is likely not as deep when you post your WM_NULL messages, which in turn means that they will be processed 'sooner'.
It seems that you need to somehow bypass the queueing effect. Using SendMessage, SendMessageCallback or SendNotifyMessage instead of PostMessage may be ways to do this. In the SendMessage case, your worker thread will block until the window proc thread is finished its current message and processes the sent WM_NULL message, but you will be able to inject your WM_NULL messages more evenly into the message processing flow. See this page for an explanation of queued vs. non-queued message handling.
If you choose to use SendMessage, but you don't want to limit the rate at which you can obtain icons due to the blocking nature of SendMessage, then you can use a third thread. Your I/O thread would post messages to the third thread, while the third thread uses SendMessage to inject icon updates into the UI thread. In this fashion, you have control of the queue of satisfied icon requests, instead of interleaving them into the window proc message queue.
As for the difference in behaviour between Win7 and WinXP, there may be a number of reasons why you don't appear to see this effect on Win7. It could be that the list view common control is implemented differently and limits the rate at which LVN_GETDISPINFO messages are generated. Or perhaps the thread scheduling mechanism in Win7 switches thread contexts more frequently or more fairly.
EDIT:
Based on your latest change, try the following:
...
struct Callback
{
HWND hWnd;
int iItem;
void operator()()
{
if (logging)
{
_tprintf(_T("#%I64lld ms: Sent: #%d\n"),
(QPC() - startTick) * 1000 / QPF(),
iItem);
}
SendNotifyMessage(hWnd, WM_NULL, 0, iItem); // <----
}
};
...
DWORD WINAPI BackgroundWorkerThread(LPVOID lpParameter)
{
HWND const hWnd = (HWND)lpParameter;
MSG msg;
while (GetMessage(&msg, NULL, 0, 0) > 0 && msg.message != WM_QUIT)
{
if (msg.message == WM_NULL)
{
SendNotifyMessage(hWnd, msg.message, msg.wParam, msg.lParam); // <----
}
}
return 0;
}
EDIT 2:
After establishing that the LVN_GETDISPINFO message are being placed in the queue using SendMessage instead of PostMessage, we can't use SendMessage ourselves to bypass them.
Still proceeding on the assumption that there is a glut of messages being processed by the wndproc before the icon results are being sent back from the worker thread, we need another way to get those updates handled as soon as they are ready.
Here's the idea:
Worker thread places results in a synchronized queue-like data structure, and then posts (using PostMessage) a WM_NULL message to the wndproc (to ensure that the wndproc gets executed sometime in the future).
At the top of the wndproc (before the case statements), the UI thread checks the synchronized queue-like data structure to see if there are any results, and if so, removes one or more results from the queue-like data structure and processes them.
The issue has less to do with thread affinity and more to do with telling the listview that it needs to update the list item every time you update it. Because you do not add the LVIF_DI_SETITEM flag to pInfo->item.mask in your LVN_GETDISPINFO handler, and because you call ListView_Update manually, when you call ListView_Update, the list view invalidates any item that still has its iImage set to I_IMAGECALLBACK.
You can fix this in one of two ways (or a combination of both):
Remove ListView_Update from your WM_NULL handler. The list view will automatically redraw the items you set the image for in your WM_NULL handler when you set them, and it will not attempt to redraw items you haven't set the image for more than once.
Set LVIF_DI_SETITEM flag in pInfo->item.mask in your LVN_GETDISPINFO handler and set pInfo->item.iImage to a value that is not I_IMAGECALLBACK.
I repro'd similar awful behavior doing a full page scroll on Vista. Doing either of the above fixed the issue while still updating the icons asynchronously.
Its plausible to suggest that this is related to XPs hyper threading/logical core scheduling and I will second IvoTops suggestion to try this with hyper-threading disabled. Please try this and let us know.
Why? Because:
a) Logical cores offer bad parallelism for CPU bound tasks. Running multiple CPU bound threads on two logical HT cores on the same physical core is detrimental to performance. See for example, this intel paper - it explains how enabling HT might cause typical server threads to incur an increase in latency or processing time for each request (while improving net throughput.)
b) Windows 7 does indeed have some HT/SMT (symmetrical multi threading) scheduling improvements. Mark Russinovich's slides here mention this briefly. Although they claim that XP scheduler is SMT aware, the fact that Windows 7 explicitly fixes something around this, implies there could be something lacking in XP. So I'm guessing that the OS isn't setting the thread affinity to the second core appropriately. (perhaps because the second core might not be idle at the instant of scheduling your second thread, to speculate wildly).
You wrote "I just tried setting the CPU affinity of the process (or even the individual threads) to all potential combinations I could think of, on the same and on different logical CPUs".
Can we try to verify that the execution is actually on the second core, once you set this?
You can visually check this in task manager or perfmon/perf counters
Maybe post the code where you set the affinity of the threads (I note that you are not checking the return value on SetProcessorAffinity, do check that as well.)
If Windows perf counters dont help, Intel's VTune Performance Analyzer is helpful for exactly this kind of stuff.
I think you can force the thread affinity manually using task manager.
One more thing: Your core i5 is either Nehalem or SandyBridge micro-architecture. Nehalem and later HT implementation is significantly different from the prior generation architectures (Core,etc). In fact Microsoft recommended disabling HT for running Biztalk server on pre-Nehalem systems. So perhaps Windows XP does not handle the new HT architecture well.
This might be a hyperthreading bug. To check if that's what causing it run your faulty program with Hyperthreading turned off (in the bios you can usually switch it off). I have run into two issues in the last five years that only surfaced when hyperthreading was enabled.

Starting a thread in a Windows CE 6.0 service

I am developing a Windows Service under Windows CE 6.0. The project produces a DLL, which gets integrated in OS Image. The service gets started, when the WinCE boots thanks to registry settings.
The problem is that I am not able to start the "Thread1" thread. I should see the MessageBox, but there is nothing on the screen. Why? Putting the MessageBox into SFC_Init works fine.
Another thing - when I type "services list" in the console (in the WinCE system), the state of my service is unknown... Why is that so?
Please help!
// SrvForCom.cpp : Defines the entry point for the DLL application.
//
#include "stdafx.h"
HINSTANCE hInst;
// main entry point of the DLL
BOOL WINAPI DllMain( HANDLE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved
)
{
hInst = (HINSTANCE)hModule;
switch(ul_reason_for_call) {
case DLL_PROCESS_ATTACH:
DisableThreadLibraryCalls((HMODULE)hModule);
break;
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
// function called during initialization process
DWORD SFC_Init(DWORD dwContext) {
PSRVCONTEXT pSrv;
HANDLE hThrd;
DWORD err = ERROR_INVALID_PARAMETER;
// fill the info structure
pSrv->servState = SERVICE_STATE_UNKNOWN;
switch (dwContext) {
case SERVICE_INIT_STARTED:
pSrv->servState = SERVICE_STATE_ON;
break;
case SERVICE_INIT_STOPPED:
pSrv->servState = SERVICE_STATE_OFF;
break;
}
// start new thread
hThrd = CreateThread (NULL, 0, Thread1, NULL, 0, NULL);
if (hThrd) {
CloseHandle (hThrd);
err = 0;
} else {
err = GetLastError();
}
return hThrd;
}
BOOL SFC_Deinit(DWORD dwData) {
return TRUE;
}
BOOL SFC_Open(DWORD dwData, DWORD dwAccess, DWORD dwShareMode) {
PSRVCONTEXT pSrv = (PSRVCONTEXT)dwData;
return (DWORD)pSrv;
}
BOOL SFC_Close(DWORD dwData) {
return 1;
}
BOOL SFC_IOControl(DWORD dwData, DWORD dwCode, PBYTE pBufIn,
DWORD dwLenIn, PBYTE pBufOut, DWORD dwLenOut,
PDWORD pdwActualOut) {
PSRVCONTEXT pSrv = (PSRVCONTEXT)dwData;
switch (dwCode) {
case IOCTL_SERVICE_STATUS:
*pBufOut = pSrv->servState;
break;
}
return 1;
}
DWORD WINAPI Thread1(LPVOID lpv) {
MessageBox (NULL, TEXT ("The thread has been successfully started!"), TEXT ("Info"), MB_OK);
return 0;
}
I have found answer to this question. The code above is correct apart from one detail - uninitialized structure. "pSrv" is a pointer (type PSRVCONTEXT) to a struct SRVCONTEXT with "servState" field. When executing "pSrv->servState = SERVICE_STATE_UNKNOWN;" some part of the memmory was overwritten, causing errors in the application.
The solution is to first allocate the memmory to this structure before using it:
pSrv = (PSRVCONTEXT)LocalAlloc (LPTR, sizeof (SRVCONTEXT));
After adding above line to my application, everything started working fine. The thread properly started and the service's state changed to "Running".
P.S. The structure looks like this:
typedef struct {
DWORD servState;
} SRVCONTEXT, *PSRVCONTEXT;

Resources