How could futex_wake return 0 - linux

I implemented semaphore using futex. The following program often fails at the assertion in sem_post(). While the return value is supposed to be 1, it sometimes returns 0. How can this happen?
When I use POSIX semaphore the program always finishes successfully.
I'm using Linux 2.6.32-642.6.1.el6.x86_64
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <ctime>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
using namespace std;
#if 0
#include <semaphore.h>
#else
typedef volatile int sem_t;
void sem_init(sem_t* sem, int shared, int value)
{
*sem = value;
}
void sem_post(sem_t* sem)
{
while (1)
{
int value = *sem;
if (__sync_bool_compare_and_swap(sem, value, value >= 0 ? value+1 : 1))
{
if (value < 0) // had contender
{
int r = syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0);
if (r != 1)
fprintf(stderr, "post r=%d err=%d sem=%d %d\n", r,errno,value,*sem);
assert(r == 1);
}
return;
}
}
}
int sem_wait(sem_t* sem)
{
while (1)
{
int value = *sem;
if (value > 0 // positive means no contender
&& __sync_bool_compare_and_swap(sem, value, value-1))
return 0;
if (value <= 0
&& __sync_bool_compare_and_swap(sem, value, -1))
{
int r= syscall(SYS_futex, sem, FUTEX_WAIT, -1, NULL, 0, 0);
if (!r) {
assert(__sync_fetch_and_sub(sem, 1) > 0);
return 0;
}
printf("wait r=%d errno=%d sem=%d %d\n", r,errno, value,*sem);
}
}
}
void sem_getvalue(sem_t* sem, int* value)
{
*value = *sem;
}
#endif
// return current time in ns
unsigned long GetTime()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec*1000000000ul + ts.tv_nsec;
}
void Send(sem_t* sem, unsigned count)
{
while (count--)
sem_post(sem);
}
void Receive(sem_t* sem, unsigned count)
{
while (count--)
sem_wait(sem);
}
int main()
{
sem_t* sem = reinterpret_cast<sem_t*>(mmap(NULL, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0));
assert(sem != MAP_FAILED);
sem_init(sem, 1, 0);
unsigned count = 10485760;
int pid = fork();
assert(pid != -1);
if (!pid) // child
{
Send(sem, count);
_exit(EXIT_SUCCESS);
}
else // parent
{
unsigned long t0 = GetTime();
Receive(sem, count);
printf("t=%g ms\n", (GetTime()-t0)*1e-6);
wait(NULL);
int v;
sem_getvalue(sem, &v);
assert(v == 0);
}
}

The call to syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0) will return 0 when there is no thread waiting on sem. In your code this is possible because you call that futex line in sem_post when *sem is negative which can be the case without that any thread is actually sleeping:
If *sem is zero when calling sem_wait you continue to execute __sync_bool_compare_and_swap(sem, value, -1) which sets *sem to -1. At that point this thread is not yet sleeping however. So, when another thread calls sem_post at that point (before the thread that is calling sem_wait enters the futex syscall) your assert failure will happen.

it seems that __sync_bool_compare_and_swap(sem, value, -1) and __sync_fetch_and_sub(sem, 1) are problematic. We need to keep in mind that sem_wait may be called concurrently by multiple threads (although in your test case there is only one thread calling it).
If we can afford the overhead of busy polling, we can remove the futex and result in the following code. It is also faster than the futex version (t=347 ms, while the futex version is t=914 ms).
void sem_post(sem_t* sem)
{
int value = __sync_fetch_and_add(sem, 1);
}
int sem_wait(sem_t* sem)
{
while (1)
{
int value = *sem;
if (value > 0) // positive means no contention
{
if (__sync_bool_compare_and_swap(sem, value, value-1)) {
return 0; // success
}
}
// yield the processor to avoid deadlock
sched_yield();
}
}
The code works as follows: The shared variable *sem is always non-negative. When a thread posts the semaphore from 0 to 1, all threads waiting on the semaphore may try, but exactly one thread will succeed in compare_and_swap.

Related

robust_list not calling FUTEX_WAKE

The Linux robust_list mechanism is a tool used by robust mutexes to support automatic unlocking in the event that the lock owner fails to unlock before terminating, maybe due to unexpected death. According to man set_robust_list:
The purpose of the robust futex list is to ensure that if a thread accidentally fails to unlock a futex before terminating or calling execve(2), another thread that is waiting on that futex is notified that the former owner of the futex has died. This notification consists of two pieces: the FUTEX_OWNER_DIED bit is set in the futex word, and the kernel performs a futex(2) FUTEX_WAKE operation on one of the threads waiting on the futex.
This is not the behavior I'm seeing.
I'm seeing the futex replaced with FUTEX_OWNER_DIED, not ored with.
And I'm not getting the FUTEX_WAKE call.
#include <chrono>
#include <thread>
#include <linux/futex.h>
#include <stdint.h>
#include <stdio.h>
#include <syscall.h>
#include <unistd.h>
using ftx_t = uint32_t;
struct mtx_t {
mtx_t* next;
mtx_t* prev;
ftx_t ftx;
};
thread_local robust_list_head robust_head;
void robust_init() {
robust_head.list.next = &robust_head.list;
robust_head.futex_offset = offsetof(mtx_t, ftx);
robust_head.list_op_pending = NULL;
syscall(SYS_set_robust_list, &robust_head.list, sizeof(robust_head));
}
void robust_op_start(mtx_t* mtx) {
robust_head.list_op_pending = (robust_list*)mtx;
__sync_synchronize();
}
void robust_op_end() {
__sync_synchronize();
robust_head.list_op_pending = NULL;
}
void robust_op_add(mtx_t* mtx) {
mtx_t* old_first = (mtx_t*)robust_head.list.next;
mtx->prev = (mtx_t*)&robust_head;
mtx->next = old_first;
__sync_synchronize();
robust_head.list.next = (robust_list*)mtx;
if (old_first != (mtx_t*)&robust_head) {
old_first->prev = mtx;
}
}
int futex(ftx_t* uaddr,
int futex_op,
int val,
uintptr_t timeout_or_val2,
ftx_t* uaddr2,
int val3) {
return syscall(SYS_futex, uaddr, futex_op, val, timeout_or_val2, uaddr2, val3);
}
int ftx_wait(ftx_t* ftx, int confirm_val) {
return futex(ftx, FUTEX_WAIT, confirm_val, 0, NULL, 0);
}
int main() {
mtx_t mtx = {0};
std::thread t0{[&]() {
fprintf(stderr, "t0 start\n");
ftx_wait(&mtx.ftx, 0);
fprintf(stderr, "t0 done\n");
}};
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::thread t1{[&]() {
fprintf(stderr, "t1 start\n");
robust_init();
robust_op_start(&mtx);
__sync_bool_compare_and_swap(&mtx.ftx, 0, syscall(SYS_gettid));
robust_op_add(&mtx);
robust_op_end();
fprintf(stderr, "t1 ftx: %x\n", mtx.ftx);
fprintf(stderr, "t1 done\n");
}};
t1.join();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
fprintf(stderr, "ftx: %x\n", mtx.ftx);
t0.join();
}
Running
g++ -o ./example ~/example.cpp -lpthread && ./example
prints something like:
t0 start
t1 start
t1 ftx: 12ea65
t1 done
ftx: 40000000
and hangs.
I would expect the final value of the futex to be 4012ea65 and for thread 0 to unblock after thread 1 completes.

Calling sem_post before sem_wait in multithreaded environment

The behavior of the sem_post() function is not clear for a binary semaphore based implementation.
What happens when you call sem_wait() after calling sem_post()?
Will it work?
Code example :
Thread 1 :
do_something_critical()
sem_post();
Thread 2 :
sem_wait()
Proceed()
Here if some how sem_post() gets called before the call to sem_wait(),
will it work? Or is it necessary that sem_wait() need to be called before sem_post()?
sem_post() merely increments the semaphore and wakes up any waiting thread if any. Otherwise it does nothing.
sem_wait() merely decrements the semaphore. The caller will be blocked only if the current value of the semaphore is 0.
Here is an example program where the main thread initializes a semaphore to 0 and calls sem_trywait() to verify that the semaphore is busy (i.e. value is 0). Then, it calls sem_post() to release the semaphore (i.e. value is 1) before creating a thread. The thread calls sem_wait() (this decrements the semaphore to 0) and returns. The main thread waits for the end of the thread and verifies that the semaphore is 0 with a call to sem_trywait():
#include <pthread.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <stdio.h>
#include <errno.h>
static sem_t *sem;
void *thd_entry(void *p)
{
int rc;
printf("Thread is starting...\n");
// This decrements the semaphore
rc = sem_wait(sem);
if (0 != rc) {
perror("sem_wait()");
return NULL;
}
printf("Thread is exiting...\n");
return NULL;
}
int main(int ac, char *av[])
{
int rc;
pthread_t thd;
// Create a semaphore with an initial value set to 0
sem = sem_open("/example", O_CREAT|O_RDWR, 0777, 0);
if (sem == SEM_FAILED) {
perror("sem_open()");
return 1;
}
// After creation the value of the semaphore is 0
rc = sem_trywait(sem);
if (-1 == rc) {
if (errno == EAGAIN) {
printf("Semaphore is busy (i.e. value is 0)\n");
} else {
perror("sem_trywait()");
return 1;
}
}
// Increment the semaphore
rc = sem_post(sem);
if (0 != rc) {
perror("sem_post()");
return 1;
}
// Create a thread
rc = pthread_create(&thd, NULL, thd_entry, 0);
if (0 != rc) {
errno = rc;
perror("pthread_create()");
return 1;
}
rc = pthread_join(thd, NULL);
if (0 != rc) {
errno = rc;
perror("pthread_join()");
return 1;
}
// The semaphore is 0 as the thread decremented it
rc = sem_trywait(sem);
if (-1 == rc) {
if (errno == EAGAIN) {
printf("Semaphore is busy (i.e. value is 0)\n");
} else {
perror("sem_trywait()");
return 1;
}
}
return 0;
}
Here is a try:
$ ls -l /dev/shm
total 0
$ gcc sema.c -o sema -lpthread
$ ./sema
Semaphore is busy (i.e. value is 0)
Thread is starting...
Thread is exiting...
Semaphore is busy (i.e. value is 0)
$ ls -l /dev/shm
total 4
-rwxrwxr-x 1 xxxxx xxxxx 32 janv. 5 16:24 sem.example
$ rm /dev/shm/sem.example

read/writes to shared variable b/w pthread not synchronized

I am trying to implement a simple producer/consumer code using pthreads. The only common shared data between producer and consumer thread is the count variable used for counting the number of available elements in the shared array. What is happening is the count updated in one thread is not getting reflected in other. How can I make sure the writes to count in one thread appear in other as well? Am I missing something?
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define ARRAY_SIZE 100
int array[ARRAY_SIZE];
volatile int count;
int head;
int tail;
pthread_cond_t full = PTHREAD_COND_INITIALIZER;
pthread_cond_t empty = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *producer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == ARRAY_SIZE) {
printf("\nNo space for new items waiting for consumer to consume");
pthread_cond_wait(&empty, &mutex);
// Sometimes, why is count variable still ARRAY_SIZE.
// How do I make sure writes to 'count' variable in
// consumer thread is visible immediately in producer
// thread?
if (count == ARRAY_SIZE) {
printf("\ncount is still ARRAY_SIZE");
exit(0);
}
}
head %= ARRAY_SIZE;
count++;
array[head] = head;
printf("\nproduced %d/%d", head, count);
head++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&full);
}
}
void *consumer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == 0) {
printf("\nNo items available waiting for producer to produce");
pthread_cond_wait(&full, &mutex);
// Sometimes, why is count variable still zero. How do I
// make sure writes to 'count' variable in producer
// thread is visible immediately in consumer thread?
if (count == 0) {
printf("\ncount is still zero");
exit(0);
}
}
tail %= ARRAY_SIZE;
int ele = array[tail];
count--;
printf("\nconsumed %d/%d", tail, count);
tail++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&empty);
}
}
int main()
{
pthread_t producer_thread;
pthread_t consumer_thread;
int ret = 0;
setbuf(stdout, NULL);
ret = pthread_create(&producer_thread, NULL, producer, NULL);
if (ret != 0) {
printf("\nUnable to create producer thread %d", ret);
goto exit;
}
ret = pthread_create(&consumer_thread, NULL, consumer, NULL);
if (ret != 0) {
printf("\nUnable to create consumer thread %d", ret);
goto exit;
}
pthread_join(producer_thread, NULL);
pthread_join(consumer_thread, NULL);
exit:
return ret;
}
produced 72/99
produced 73/100
No space for new items waiting for consumer to consume
consumed 74/99
consumed 75/98
consumed 76/97
consumed 77/96
produced 74/97
produced 75/98
produced 76/99
produced 77/100
No space for new items waiting for consumer to consume
count is still ARRAY_SIZE <------ incorrect
consumed 21/2
consumed 22/1
consumed 23/0
No items available waiting for producer to produce
produced 24/1
consumed 24/0
No items available waiting for producer to produce
produced 25/1
produced 26/2
produced 27/3
consumed 25/2
consumed 26/1
consumed 27/0
No items available waiting for producer to produce
count is still zero <------ incorrect
Solution that worked after the fix from Zan Lynx
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define ARRAY_SIZE 100
int array[ARRAY_SIZE];
volatile int count;
int head;
int tail;
pthread_cond_t full = PTHREAD_COND_INITIALIZER;
pthread_cond_t empty = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *producer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == ARRAY_SIZE) {
printf("\nNo space for new items waiting for consumer to consume");
// Spurious wakeups from the pthread_cond_timedwait() or
// pthread_cond_wait() functions may occur. Since the
// return from pthread_cond_timedwait() or
// pthread_cond_wait() does not imply anything about the
// value of this predicate, the predicate should be
// re-evaluated upon such return.
while (count == ARRAY_SIZE)
pthread_cond_wait(&empty, &mutex);
}
head %= ARRAY_SIZE;
count++;
array[head] = head;
printf("\nproduced %d/%d", head, count);
head++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&full);
}
return NULL;
}
void *consumer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == 0) {
printf("\nNo items available waiting for producer to produce");
// Spurious wakeups from the pthread_cond_timedwait() or
// pthread_cond_wait() functions may occur. Since the
// return from pthread_cond_timedwait() or
// pthread_cond_wait() does not imply anything about the
// value of this predicate, the predicate should be
// re-evaluated upon such return.
while (count == 0)
pthread_cond_wait(&full, &mutex);
}
tail %= ARRAY_SIZE;
int ele = array[tail];
count--;
printf("\nconsumed %d/%d", tail, count);
tail++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&empty);
}
return NULL;
}
int main()
{
pthread_t producer_thread;
pthread_t consumer_thread;
int ret = 0;
setbuf(stdout, NULL);
ret = pthread_create(&producer_thread, NULL, producer, NULL);
if (ret != 0) {
printf("\nUnable to create producer thread %d", ret);
goto exit;
}
ret = pthread_create(&consumer_thread, NULL, consumer, NULL);
if (ret != 0) {
printf("\nUnable to create consumer thread %d", ret);
goto exit;
}
pthread_join(producer_thread, NULL);
pthread_join(consumer_thread, NULL);
exit:
return ret;
}
I believe you missed the fact that condition waits must always check the predicate again after the wait returns. There must be a loop.
The wait may end for all sorts of reasons besides a signal/notify call.

My semaphore module is not working properly(Dining philosopher)

I'm implementing a semaphore methods to understand synchronization and thread things.
By using my semaphore, I tried to solve the Dining Philosophers problem.
My plan was making deadlock situation first.
But I found that just only one philosopher eat repeatedly.
And I checked that my semaphore is working quite good by using other synchronization problems. I think there is some problem with grammar.
please let me know what is the problem.
Here is my code.
dinig.c (including main function)
#include "sem.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
static tsem_t *chopstick[5];
static tsem_t *updating;
static int update_status (int i, int eating)
{
static int status[5] = { 0, };
static int duplicated;
int idx;
int sum;
tsem_wait (updating);
status[i] = eating;
/* Check invalid state. */
duplicated = 0;
sum = 0;
for (idx = 0; idx < 5; idx++)
{
sum += status[idx];
if (status[idx] && status[(idx + 1) % 5])
duplicated++;
}
/* Avoid printing empty table. */
if (sum == 0)
{
tsem_signal (updating);
return 0;
}
for (idx = 0; idx < 5; idx++)
fprintf (stdout, "%3s ", status[idx] ? "EAT" : "...");
/* Stop on invalid state. */
if (sum > 2 || duplicated > 0)
{
fprintf (stdout, "invalid %d (duplicated:%d)!\n", sum, duplicated);
exit (1);
}
else
fprintf (stdout, "\n");
tsem_signal (updating);
return 0;
}
void *thread_func (void *arg)
{
int i = (int) (long) arg;
int k = (i + 1) % 5;
do
{
tsem_wait (chopstick[i]);
tsem_wait (chopstick[k]);
update_status (i, 1);
update_status (i, 0);
tsem_signal (chopstick[i]);
tsem_signal (chopstick[k]);
}
while (1);
return NULL;
}
int main (int argc,
char **argv)
{
int i;
for (i = 0; i < 5; i++)
chopstick[i] = tsem_new (1);
updating = tsem_new (1);
for (i = 0; i < 5; i++)
{
pthread_t tid;
pthread_create (&tid, NULL, thread_func, (void *) (long) i);
}
/* endless thinking and eating... */
while (1)
usleep (10000000);
return 0;
}
sem.c(including semaphore methods)
#include "sem.h"
.
sem.h(Header for sem.c)
#ifndef __SEM_H__
#define __SEM_H__
#include <pthread.h>
typedef struct test_semaphore tsem_t;
tsem_t *tsem_new (int value);
void tsem_free (tsem_t *sem);
void tsem_wait (tsem_t *sem);
int tsem_try_wait (tsem_t *sem);
void tsem_signal (tsem_t *sem);
#endif /* __SEM_H__ */
compile command
gcc sem.c dining.c -pthread -o dining
One problem is that in tsem_wait() you have the following code sequence outside of a lock:
while(sem->count <= 0)
continue;
There's no guarantee that the program will actually re-read sem->count - the compiler is free to produce machine code that does something like the following:
int temp = sem->count;
while(temp <= 0)
continue;
In fact, this will likely happen in an optimized build.
Try changing your busy wait loop to something like this so the count is checked while holding the lock:
void tsem_wait (tsem_t *sem)
{
pthread_mutex_lock(&(sem->mutexLock));
while (sem->count <= 0) {
pthread_mutex_unlock(&(sem->mutexLock));
usleep(1);
pthread_mutex_lock(&(sem->mutexLock));
}
// sem->mutexLock is still held here...
sem->count--;
pthread_mutex_unlock(&(sem->mutexLock));
}
Strictly speaking, you should do something similar for tsem_try_wait() (which you're not using yet).
Note that you might want to consider using a pthread_cond_t to make waiting on the counter changing more efficient.
Finally, your code to 'get' the chopsticks in thread_func() has the classic Dining Philosopher deadlock problem in the situation where each philosopher simultaneously acquires the 'left' chopstick (chopstick[i]) and ends up waiting forever to get the 'right' chopstick (chopstick[k]) since all the chopsticks are in some philosopher's left hand.

Differences between POSIX threads on OSX and LINUX?

Can anyone shed light on the reason that when the below code is compiled and run on OSX the 'bartender' thread skips through the sem_wait() in what seems like a random manner and yet when compiled and run on a Linux machine the sem_wait() holds the thread until the relative call to sem_post() is made, as would be expected?
I am currently learning not only POSIX threads but concurrency as a whole so absoutely any comments, tips and insights are warmly welcomed...
Thanks in advance.
#include <stdio.h>
#include <stdlib.h>
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
//using namespace std;
#define NSTUDENTS 30
#define MAX_SERVINGS 100
void* student(void* ptr);
void get_serving(int id);
void drink_and_think();
void* bartender(void* ptr);
void refill_barrel();
// This shared variable gives the number of servings currently in the barrel
int servings = 10;
// Define here your semaphores and any other shared data
sem_t *mutex_stu;
sem_t *mutex_bar;
int main() {
static const char *semname1 = "Semaphore1";
static const char *semname2 = "Semaphore2";
pthread_t tid;
mutex_stu = sem_open(semname1, O_CREAT, 0777, 0);
if (mutex_stu == SEM_FAILED)
{
fprintf(stderr, "%s\n", "ERROR creating semaphore semname1");
exit(EXIT_FAILURE);
}
mutex_bar = sem_open(semname2, O_CREAT, 0777, 1);
if (mutex_bar == SEM_FAILED)
{
fprintf(stderr, "%s\n", "ERROR creating semaphore semname2");
exit(EXIT_FAILURE);
}
pthread_create(&tid, NULL, bartender, &tid);
for(int i=0; i < NSTUDENTS; ++i) {
pthread_create(&tid, NULL, student, &tid);
}
pthread_join(tid, NULL);
sem_unlink(semname1);
sem_unlink(semname2);
printf("Exiting the program...\n");
}
//Called by a student process. Do not modify this.
void drink_and_think() {
// Sleep time in milliseconds
int st = rand() % 10;
sleep(st);
}
// Called by a student process. Do not modify this.
void get_serving(int id) {
if (servings > 0) {
servings -= 1;
} else {
servings = 0;
}
printf("ID %d got a serving. %d left\n", id, servings);
}
// Called by the bartender process.
void refill_barrel()
{
servings = 1 + rand() % 10;
printf("Barrel refilled up to -> %d\n", servings);
}
//-- Implement a synchronized version of the student
void* student(void* ptr) {
int id = *(int*)ptr;
printf("Started student %d\n", id);
while(1) {
sem_wait(mutex_stu);
if(servings > 0) {
get_serving(id);
} else {
sem_post(mutex_bar);
continue;
}
sem_post(mutex_stu);
drink_and_think();
}
return NULL;
}
//-- Implement a synchronized version of the bartender
void* bartender(void* ptr) {
int id = *(int*)ptr;
printf("Started bartender %d\n", id);
//sleep(5);
while(1) {
sem_wait(mutex_bar);
if(servings <= 0) {
refill_barrel();
} else {
printf("Bar skipped sem_wait()!\n");
}
sem_post(mutex_stu);
}
return NULL;
}
The first time you run the program, you're creating named semaphores with initial values, but since your threads never exit (they're infinite loops), you never get to the sem_unlink calls to delete those semaphores. If you kill the program (with ctrl-C or any other way), the semaphores will still exist in whatever state they are in. So if you run the program again, the sem_open calls will succeed (because you don't use O_EXCL), but they won't reset the semaphore value or state, so they might be in some odd state.
So you should make sure to call sem_unlink when the program STARTS, before calling sem_open. Better yet, don't use named semaphores at all -- use sem_init to initialize a couple of unnamed semaphores instead.

Resources