why does pthread_cond_timedwait not trigger after indicated time-limit? - linux

This is supposed to work in a loop (server) and delegate work/inquiry to a faulty library, here represented by the longrun() function call, to a thread with a time-out of tmax=3s. I placed synchronization vars and i am trying to wait for no more than this limit, but when longrun() hangs (run 4), it still waits the full time (7s) instead of the requested limit. Can anyone explain?
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>
#include <iostream>
using namespace std;
string int2str(int i){
char buf[10]; // no larger int passed we hope
int end = sprintf(buf, "%d", i);
buf[end] = '\0';
return string(buf);
}
string longrun(int qi){
if(qi % 4 == 0) {
sleep(7);
return string("'---- to: ") + int2str(qi) + string("' (hang case)");
}
else {
sleep(1);
return string("'okay to: ") + int2str(qi) + string("'");
}
}
struct tpack_t { // thread pack
pthread_t thread;
pthread_mutex_t mutex;
pthread_cond_t go; // have a new value to run
pthread_cond_t ready; // tell main thread we're done processing
int newq; // predicate on go+ready condition for wait
int qi; // place question as int to thread: question-int
string res; // where i place the response
tpack_t();
};
tpack_t::tpack_t() {
pthread_mutex_init (&mutex, NULL);
pthread_cond_init (&go, NULL);
pthread_cond_init (&ready, NULL);
newq = 0;
}
void set_cond_time(timespec* ctp, int tmax){
timeval now;
gettimeofday(&now, NULL);
ctp->tv_nsec = now.tv_usec * 1000UL;
ctp->tv_sec = now.tv_sec + tmax; // now + max time!
printf("[m] ... set to sleep for %d sec, i hope...\n", tmax);
}
void take_faulty_evasive_action(tpack_t* tpx){
// basically kill thread, clean faulty library copy (that file) and restart it
cout << "will work on it (restarting thread) soon!\n";
tpx->newq = 0; // minimal action for now...
}
void* faulty_proc(void* arg){
tpack_t* tpx = (tpack_t*) arg;
while(true){
pthread_mutex_lock(&tpx->mutex);
while(tpx->newq == 0){
pthread_cond_wait(&tpx->go, &tpx->mutex);
}
printf("[t] to process : %d\n", tpx->qi); fflush(stdout);
// now i have a new value in qi, process it and place the answer in... res
tpx->res = longrun(tpx->qi);
tpx->newq = 0;
pthread_mutex_unlock(&tpx->mutex);
pthread_cond_signal(&tpx->ready);
}
}
int main(int argc, char* argv[]){
cout << "\n this presents the problem: idx = 4k -> hang case ...\n ( challenge is to eliminate them by killing thread and restarting it )\n\n";
printf(" ETIMEDOUT = %d EINVAL = %d EPERM = %d\n\n", ETIMEDOUT, EINVAL, EPERM);
tpack_t* tpx = new tpack_t();
pthread_create(&tpx->thread, NULL, &faulty_proc, (void*) tpx);
// max wait time; more than that is a hanging indication!
int numproc = 5;
++numproc;
int tmax = 3;
timespec cond_time;
cond_time.tv_nsec = 0;
int status, expired; // for timed wait on done condition!
time_t t0 = time(NULL);
for(int i=1; i<numproc; ++i){
expired = 0;
pthread_mutex_lock(&tpx->mutex);
tpx->qi = i; // init the question
tpx->newq = 1; // ... predicate
//pthread_mutex_unlock(&tpx->mutex);
pthread_cond_signal(&tpx->go); // let it know that...
while(tpx->newq == 1){
/// ---------------------- most amazing region, timedwait waits all the way! ----------------------
set_cond_time(&cond_time, tmax); // time must be FROM NOW! (abs time, not interval)
time_t wt0 = time(NULL);
status = pthread_cond_timedwait(&tpx->ready, &tpx->mutex, &cond_time);
printf("[m] ---- \t exited with status = %d (after %.2fs)\n", status, difftime(time(NULL), wt0));
/// -----------------------------------------------------------------------------------------------
if (status == ETIMEDOUT){
printf("\t ['t was and newq == %d]\n", tpx->newq);
if(tpx->newq == 1){ // check one more time, to elim race possibility
expired = 1;
break;
}
}
else if(status != 0){
fprintf(stderr, "cond timewait for faulty to reply errored out\n");
return 1;
}
}
if(expired){
take_faulty_evasive_action(tpx); // kill thread, start new one, report failure below
cout << "[m] :: interruption: default bad answer goes here for " << i << "\n\n";
}
else {
cout << "[m] :: end with ans: " << tpx->res << endl << endl;
}
pthread_mutex_unlock(&tpx->mutex);
}
time_t t1 = time(NULL);
printf("took %.2f sec to run\n", difftime(t1, t0));
}
Used 'g++ -pthread code.cc' to compile under linux (ubuntu 16.04). Output is:
this presents the problem: idx = 4k -> hang case ...
( challenge is to eliminate them by killing thread and restarting it )
ETIMEDOUT = 110 EINVAL = 22 EPERM = 1
[m] ... set to sleep for 3 sec, i hope...
[t] to process : 1
[m] ---- exited with status = 0 (after 1.00s)
[m] :: end with ans: 'okay to: 1'
[m] ... set to sleep for 3 sec, i hope...
[t] to process : 2
[m] ---- exited with status = 0 (after 1.00s)
[m] :: end with ans: 'okay to: 2'
[m] ... set to sleep for 3 sec, i hope...
[t] to process : 3
[m] ---- exited with status = 0 (after 1.00s)
[m] :: end with ans: 'okay to: 3'
[m] ... set to sleep for 3 sec, i hope...
[t] to process : 4
[m] ---- exited with status = 110 (after 7.00s)
['t was and newq == 0]
[m] :: end with ans: '---- to: 4' (hang case)
[m] ... set to sleep for 3 sec, i hope...
[t] to process : 5
[m] ---- exited with status = 0 (after 1.00s)
[m] :: end with ans: 'okay to: 5'
took 11.00 sec to run

The problem is that faulty_proc() keeps tpx->mutex locked while it calls longrun(), and the pthread_cond_timedwait() call in main() can't return until it can re-acquire the mutex, even if the timeout expires.
If longrun() doesn't need the mutex to be locked - and that seems to be the case - you can unlock the mutex around that call and re-lock it before setting the completion flag and signalling the condition variable.

Related

synchronize 3 threads to print sequential output

I was asked this question in an interview. I was pretty clueless.
So I decided to learn some multithreading and hopefully find an answer to this question.
I need to use 3 threads to print the output: 01020304050607.....
Thread1: prints 0
Thread2: prints odd numbers
Thread3: prints even numbers
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
std::mutex m;
std::condition_variable cv1, cv2, cv3;
int count = 0;
void printzero(int end)
{
while (count <= end)
{
std::unique_lock<std::mutex> lock(m);
cv1.wait(lock);
std::cout << 0 << " ";
++count;
if (count % 2 == 1)
{
lock.unlock();
cv2.notify_one();
}
else
{
lock.unlock();
cv3.notify_one();
}
}
}
void printodd(int end)
{
while (count <= end)
{
std::unique_lock<std::mutex> lock(m);
cv2.wait(lock);
if (count % 2 == 1)
{
std::cout << count << " ";
++count;
lock.unlock();
cv1.notify_one();
}
}
}
void printeven(int end)
{
while (count <= end)
{
std::unique_lock<std::mutex> lock(m);
cv3.wait(lock);
if (count % 2 == 0)
{
std::cout << count << " ";
++count;
lock.unlock();
cv1.notify_one();
}
}
}
int main()
{
int end = 10;
std::thread t3(printzero, end);
std::thread t1(printodd, end);
std::thread t2(printeven, end);
cv1.notify_one();
t1.join();
t2.join();
t3.join();
return 0;
}
My solution seems to be in a deadlock situation. I'm not even sure if the logic is correct. Please help
There are several issues with your code. Here is what you need to do in order to make it work:
Revise your while (count <= end) check. Reading count without synchronization is undefined behavior (UB).
Use a proper predicate with std::condition_variable::wait. Problems of your code without predicate:
If notify_one is called before wait then the notification is lost. In the worst case, main's call to notify_one is executed before the threads start running. As a result, all threads may wait indefinitely.
Spurious wakeups may disrupt your program flow. See also cppreference.com on std::condition variable.
Use std::flush (just to be sure).
I played around with your code quite a lot. Below you find a version where I applied my suggested fixes. In addition, I also experimented with some other ideas that came to my mind.
#include <cassert>
#include <condition_variable>
#include <functional>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>
// see the `std::mutex` for an example how to avoid global variables
std::condition_variable cv_zero{};
std::condition_variable cv_nonzero{};
bool done = false;
int next_digit = 1;
bool need_zero = true;
void print_zero(std::mutex& mt) {
while(true) {// do not read shared state without holding a lock
std::unique_lock<std::mutex> lk(mt);
auto pred = [&] { return done || need_zero; };
cv_zero.wait(lk, pred);
if(done) break;
std::cout << 0 << "\t"
<< -1 << "\t"// prove that it works
<< std::this_thread::get_id() << "\n"// prove that it works
<< std::flush;
need_zero = false;
lk.unlock();
cv_nonzero.notify_all();// Let the other threads decide which one
// wants to proceed. This is probably less
// efficient, but preferred for
// simplicity.
}
}
void print_nonzero(std::mutex& mt, int end, int n, int N) {
// Example for `n` and `N`: Launch `N == 2` threads with this
// function. Then the thread with `n == 1` prints all odd numbers, and
// the one with `n == 0` prints all even numbers.
assert(N >= 1 && "number of 'nonzero' threads must be positive");
assert(n >= 0 && n < N && "rank of this nonzero thread must be valid");
while(true) {// do not read shared state without holding a lock
std::unique_lock<std::mutex> lk(mt);
auto pred = [&] { return done || (!need_zero && next_digit % N == n); };
cv_nonzero.wait(lk, pred);
if(done) break;
std::cout << next_digit << "\t"
<< n << "\t"// prove that it works
<< std::this_thread::get_id() << "\n"// prove that it works
<< std::flush;
// Consider the edge case of `end == INT_MAX && next_digit == INT_MAX`.
// -> You need to check *before* incrementing in order to avoid UB.
assert(next_digit <= end);
if(next_digit == end) {
done = true;
cv_zero.notify_all();
cv_nonzero.notify_all();
break;
}
++next_digit;
need_zero = true;
lk.unlock();
cv_zero.notify_one();
}
}
int main() {
int end = 10;
int N = 2;// number of threads for `print_nonzero`
std::mutex mt{};// example how to pass by reference (avoiding globals)
std::thread t_zero(print_zero, std::ref(mt));
// Create `N` `print_nonzero` threads with `n` in [0, `N`).
std::vector<std::thread> ts_nonzero{};
for(int n=0; n<N; ++n) {
// Note that it is important to pass `n` by value.
ts_nonzero.emplace_back(print_nonzero, std::ref(mt), end, n, N);
}
t_zero.join();
for(auto&& t : ts_nonzero) {
t.join();
}
}

pthread_mutex_lock is not instante...?

people. I was trying to use mutex as a subtitute for atomic variables, but has the resut demonstrates,it seems that the mutex is not instante, because the "num" still have time to half change, making the if condition true.
Is this supose to work like that, or im i just doing :().
Comente please.. THankes:D
#include <iostream>
#include <pthread.h>
#include <mutex>
using namespace std;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
int num = 450;
void* print1 (void* e){
while(1){
if(num != 450 && num != 201 ){
cout << "num-> "<<num<<endl;
}
pthread_mutex_lock(&mutex);
Sleep(0.5);
num = 450;
pthread_mutex_unlock(&mutex);
}
}
void* print2 (void* e){
while(1){
if(num != 450 && num != 201 ){
cout << "num-> "<<num<<endl;
}
pthread_mutex_lock(&mutex);
Sleep(0.5);
num = 201;
pthread_mutex_unlock(&mutex);
}
}
int main(){
pthread_t* threads1;
threads1 = new pthread_t;
pthread_t* threads2;
threads2 = new pthread_t;
//-------------------------------------
pthread_create(threads1,NULL,print1,(void*)NULL);
pthread_create(threads2,NULL,print2,(void*)NULL);
pthread_join(*threads1,(void**)NULL);
pthread_join(*threads2,(void**)NULL);
return false;
}
Result:
num-> 450
num-> 450
num-> 450
num-> 450
...
I was expecting:
(blanck)...
Your code does not synchronize read access to the shared num variable with write access to the same variable. This is clearly visible when compiling with -fsanitize=thread:
WARNING: ThreadSanitizer: data race (pid=3082)
Write of size 4 at 0x0000006020d0 by thread T1 (mutexes: write M9):
#0 print1(void*) test.cc:19 (test+0x000000400df0)
#1 <null> <null> (libtsan.so.0+0x000000024459)
Previous read of size 4 at 0x0000006020d0 by thread T2:
#0 print2(void*) test.cc:27 (test+0x000000400e3f)
#1 <null> <null> (libtsan.so.0+0x000000024459)
Read access needs some form of synchronization as well, not just write access. Otherwise, you may not even observe updates, or see inconsistent data. The details are a consequence of the C++ memory model.

My semaphore module is not working properly(Dining philosopher)

I'm implementing a semaphore methods to understand synchronization and thread things.
By using my semaphore, I tried to solve the Dining Philosophers problem.
My plan was making deadlock situation first.
But I found that just only one philosopher eat repeatedly.
And I checked that my semaphore is working quite good by using other synchronization problems. I think there is some problem with grammar.
please let me know what is the problem.
Here is my code.
dinig.c (including main function)
#include "sem.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
static tsem_t *chopstick[5];
static tsem_t *updating;
static int update_status (int i, int eating)
{
static int status[5] = { 0, };
static int duplicated;
int idx;
int sum;
tsem_wait (updating);
status[i] = eating;
/* Check invalid state. */
duplicated = 0;
sum = 0;
for (idx = 0; idx < 5; idx++)
{
sum += status[idx];
if (status[idx] && status[(idx + 1) % 5])
duplicated++;
}
/* Avoid printing empty table. */
if (sum == 0)
{
tsem_signal (updating);
return 0;
}
for (idx = 0; idx < 5; idx++)
fprintf (stdout, "%3s ", status[idx] ? "EAT" : "...");
/* Stop on invalid state. */
if (sum > 2 || duplicated > 0)
{
fprintf (stdout, "invalid %d (duplicated:%d)!\n", sum, duplicated);
exit (1);
}
else
fprintf (stdout, "\n");
tsem_signal (updating);
return 0;
}
void *thread_func (void *arg)
{
int i = (int) (long) arg;
int k = (i + 1) % 5;
do
{
tsem_wait (chopstick[i]);
tsem_wait (chopstick[k]);
update_status (i, 1);
update_status (i, 0);
tsem_signal (chopstick[i]);
tsem_signal (chopstick[k]);
}
while (1);
return NULL;
}
int main (int argc,
char **argv)
{
int i;
for (i = 0; i < 5; i++)
chopstick[i] = tsem_new (1);
updating = tsem_new (1);
for (i = 0; i < 5; i++)
{
pthread_t tid;
pthread_create (&tid, NULL, thread_func, (void *) (long) i);
}
/* endless thinking and eating... */
while (1)
usleep (10000000);
return 0;
}
sem.c(including semaphore methods)
#include "sem.h"
.
sem.h(Header for sem.c)
#ifndef __SEM_H__
#define __SEM_H__
#include <pthread.h>
typedef struct test_semaphore tsem_t;
tsem_t *tsem_new (int value);
void tsem_free (tsem_t *sem);
void tsem_wait (tsem_t *sem);
int tsem_try_wait (tsem_t *sem);
void tsem_signal (tsem_t *sem);
#endif /* __SEM_H__ */
compile command
gcc sem.c dining.c -pthread -o dining
One problem is that in tsem_wait() you have the following code sequence outside of a lock:
while(sem->count <= 0)
continue;
There's no guarantee that the program will actually re-read sem->count - the compiler is free to produce machine code that does something like the following:
int temp = sem->count;
while(temp <= 0)
continue;
In fact, this will likely happen in an optimized build.
Try changing your busy wait loop to something like this so the count is checked while holding the lock:
void tsem_wait (tsem_t *sem)
{
pthread_mutex_lock(&(sem->mutexLock));
while (sem->count <= 0) {
pthread_mutex_unlock(&(sem->mutexLock));
usleep(1);
pthread_mutex_lock(&(sem->mutexLock));
}
// sem->mutexLock is still held here...
sem->count--;
pthread_mutex_unlock(&(sem->mutexLock));
}
Strictly speaking, you should do something similar for tsem_try_wait() (which you're not using yet).
Note that you might want to consider using a pthread_cond_t to make waiting on the counter changing more efficient.
Finally, your code to 'get' the chopsticks in thread_func() has the classic Dining Philosopher deadlock problem in the situation where each philosopher simultaneously acquires the 'left' chopstick (chopstick[i]) and ends up waiting forever to get the 'right' chopstick (chopstick[k]) since all the chopsticks are in some philosopher's left hand.

Will not closing read end of a pipe lead to "Broken pipe" on write end

I have the following program (trimmed down version of my original code) to highlight what I am observing:
#include <iostream>
#include <unistd.h>
#include <sys/wait.h>
#include <errno.h>
#include <cassert>
#include <cstring>
using namespace std;
int main()
{
for (uint32_t i = 0; i < 5; ++i) {
int pipefd[2];
char buff[64];
FILE *cmd_output_fd;
int rval;
int pid, child_status;
rval = pipe(pipefd);
assert(rval == 0);
cout << "Pipe : " << pipefd[0] << ", " << pipefd[1] << endl;
pid = fork();
assert(pid >= 0);
if (pid == 0) {
// Child process - exec cksum
dup2(pipefd[1], STDOUT_FILENO);
close(pipefd[0]);
close(pipefd[1]);
execlp("cksum", "cksum", "testout", nullptr);
// If we are here, execlp() failed.
assert(0);
return 1;
}
close(pipefd[1]);
// wait for child to finish up.
wait(&child_status);
if ((WIFEXITED(child_status)) && (WEXITSTATUS(child_status) != 0)) {
std::cout << "cksum utility finished with error, exit status "
<< WEXITSTATUS(child_status) << endl;
}
// If all is well, cksum has checksum of testout in it's stdout (pipefd[1])
cmd_output_fd = fdopen(pipefd[0], "r");
assert(cmd_output_fd != NULL); // Error (NOTE 1)
if (fgets(buff, sizeof(buff), cmd_output_fd) == NULL) {
assert(0); // Error
}
fclose(cmd_output_fd);
}
return 0;
}
Once in a rare while, I encounter the following error:
Pipe : 22, 24
cksum: write error: Broken pipe
cksum utility finished with error, exit status 1
When the cksum utility hits the "Broken pipe" error (1), cmd_output_fd is NULL (2) (triggers assert() in line marked Note 1). My guess is during the loop when the error is encountered, pipefd[0] (the reader end) gets closed somehow, prior to cksum being launched. This would explain the two observations - cksum not able to write to pipefd[0], and fdopen() fails.
If my assumption is correct, what could be causing pipefd[0] to be closed?
Just a wild guess, could this issue be caused because I am not doing a close(pipefd[0]) at the end of the for loop. If so, how?
Thank you,
Ahmed.

Why is my timer not periodic but expired only one time?

I have created a timer using POSIX timerfd function.
Intention is, the timer should be periodic, and the timer expiry is observed from a seperate function called myFunc( )
I am calling this function multiple times, so that the timer expiry can be observed periodically after a wait of 5 secs.
Problem is, as soon as first time it expires after 5 seconds, next time onwards...it doesn't expire again, that is no delay of 5 seconds is observed from the second iteration onwards.
Can someone tell me what i am missing?
#include <stdio.h>
#include <iostream>
#include <errno.h>
#include <dlfcn.h>
#include <assert.h>
#include <sys/mman.h>
#include <new>
#include <limits.h>
#include <sys/epoll.h>
#include <sys/timerfd.h>
using namespace std;
struct epoll_event event;
int timer_fd, efd, no_of_fd;
void myFunc( int i );
int main()
{
struct itimerspec its;
its.it_value.tv_sec = 5;
its.it_value.tv_nsec = 0;
its.it_interval.tv_sec = 3; // Every 3 seconds interval
its.it_interval.tv_nsec = 0;
efd = epoll_create(2);
timer_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK);
if ( timer_fd == -1 )
{
fprintf(stderr, "timerfd_create error in start timer");
return 1;
}
event.data.fd = timer_fd;
event.events = EPOLLIN|EPOLLPRI;
if ( epoll_ctl(efd, EPOLL_CTL_ADD, timer_fd, &event) == -1 )
{
fprintf(stderr, "epoll_ctl error in start timer");
return 1;
}
if ( timerfd_settime(timer_fd, 0, &its, NULL) == -1 )
{
fprintf(stderr, "timerfd_settime error in start timer");
return 1;
}
myFunc( 10 );
myFunc( 20 );
myFunc( 30 );
}
void myFunc( int i )
{
printf("Inside myFunc %d\n", i);
no_of_fd = 0;
struct epoll_event revent;
errno = 0;
do {
no_of_fd = epoll_wait(efd, &revent, 1, -1);
} while ( no_of_fd < 0 && errno == EINTR );
if ( no_of_fd < 0 )
{
fprintf(stderr, "epoll_wait error in start timer");
}
if ( revent.data.fd == timer_fd ) {
printf("Timer expired \n");
}
}
When using epoll with level-triggering, you should read 8 bytes on every EPOLLIN. This is an int64 that tells you the number of event expirations. Reading it effectively "clears" this number so that the next EPOLLIN is the result of a different event expiration.
The manual tells you about reading:
If the timer has already expired one or more times since its
settings were last modified using timerfd_settime(), or since
the last successful read(2), then the buffer given to read(2)
returns an unsigned 8-byte integer (uint64_t) containing the
number of expirations that have occurred. (The returned value
is in host byte order—that is, the native byte order for
integers on the host machine.)

Resources