I am using a robust mutex together with a condition. This works most of the time, but infrequently, I get deadlocks.
I could not reduce this to a small, reproducible example, and I consider it very likely that it is a problem in my code, however, I noticed something that looks suspicious:
When the code deadlocks, one thread is in pthread_cond_broadcast:
#0 __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
#1 0x00007f4ab2892970 in pthread_cond_broadcast##GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S:133
Another thread is in pthread_mutex_lock, on the mutex which is used with the condition:
#0 __lll_robust_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S:85
#1 0x00007f4ab288e7d7 in __pthread_mutex_lock_full (mutex=0x7f4a9858b128) at ../nptl/pthread_mutex_lock.c:256
As you can see, pthread_mutex_lock uses lowlevelrobustlock, while pthread_cond_broadcast uses lowlevellock. Is it possible that the condition somehow uses a non-robust mutex internally?
I use the mutex to protected shared memory, and it is possible that one of the processes sharing it gets killed.
So, maybe my deadlocks happen because the process was inside pthread_cond_broadcast when it was killed, and now, the other process can not broadcast, because the killed process still owns the mutex? After all, a similar situation was why I started using a robust mutex in the first place.
PS: Situations where the process gets killed in the critical section are handled, the robust mutex works great. For all the deadlocks, I saw this situation where pthread_cond_broadcast was the active function.
PPS: for the mutex, there is pthread_mutexattr_setrobust, but I could not find something like pthread_condattr_setrobust. Does it exist?
EDIT:
This 'bug' has been reported here. It is just undefined behavior of condition variable in this particular use case. There are no robust condition variables, so they cannot be used in IPC with shared memory. Thread cancellation can leave the condition variable in inconsistent state.
The previous answer is below:
I have the same problem. Here is example code that causes deadlock in pthread_cond_broadcast:
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#define TRUE 1
#define FALSE 0
typedef struct {
pthread_cond_t cond;
pthread_mutex_t mtx;
int predicate;
} channel_hdr_t;
typedef struct {
int fd;
channel_hdr_t *hdr;
} channel_t;
void printUsage() {
printf("usage: shm_comm_test2 channel_name1 channel_name2\n");
}
int robust_mutex_lock(pthread_mutex_t *mutex) {
// lock hdr mutex in the safe way
int lock_status = pthread_mutex_lock (mutex);
int acquired = FALSE;
int err = -18;
switch (lock_status)
{
case 0:
acquired = TRUE;
break;
case EINVAL:
printf("**** EINVAL ****\n");
err = -12;
break;
case EAGAIN:
printf("**** EAGAIN ****\n");
err = -13;
break;
case EDEADLK:
printf("**** EDEADLK ****\n");
err = -14;
break;
case EOWNERDEAD:
// the reader that acquired the mutex is dead
printf("**** EOWNERDEAD ****\n");
// recover the mutex
if (pthread_mutex_consistent(mutex) == EINVAL) {
printf("**** EOWNERDEAD, EINVAL ****\n");
err = -15;
break;
}
acquired = TRUE;
break;
default:
printf("**** OTHER ****\n");
// other error
err = -18;
break;
}
return acquired ? 0 : err;
}
int init_channel(char *shm_name, channel_t *out) {
int initialize = FALSE;
int shm_fd = shm_open (shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
if (shm_fd < 0) {
if (errno == EEXIST) {
// open again, do not initialize
shm_fd = shm_open (shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
if (shm_fd < 0) {
printf( "ERROR: could not create %s, errno: %d\n", shm_name, errno );
return 1;
}
}
else {
printf( "ERROR: could not create %s, errno: %d\n", shm_name, errno );
return 2;
}
}
else {
// the shm object was created, so initialize it
initialize = TRUE;
printf("created shm object %s\n", shm_name);
if (ftruncate (shm_fd, sizeof(channel_hdr_t)) != 0)
{
printf( "ERROR: could not ftruncate %s, errno: %d\n", shm_name, errno );
close (shm_fd);
shm_unlink (shm_name);
return 3;
}
}
void *ptr_shm_hdr = mmap (NULL, sizeof(channel_hdr_t), PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if (ptr_shm_hdr == MAP_FAILED)
{
printf( "ERROR: could not mmap %s, errno: %d\n", shm_name, errno );
close (shm_fd);
shm_unlink (shm_name);
return 4;
}
channel_hdr_t *shm_hdr = ptr_shm_hdr;
if (initialize) {
// set mutex shared between processes
pthread_mutexattr_t mutex_attr;
pthread_mutexattr_init(&mutex_attr);
pthread_mutexattr_setpshared (&mutex_attr, PTHREAD_PROCESS_SHARED);
pthread_mutexattr_setrobust (&mutex_attr, PTHREAD_MUTEX_ROBUST);
pthread_mutexattr_setprotocol(&mutex_attr, PTHREAD_PRIO_INHERIT);
pthread_mutex_init (&shm_hdr->mtx, &mutex_attr);
// set condition shared between processes
pthread_condattr_t cond_attr;
pthread_condattr_init(&cond_attr);
pthread_condattr_setpshared (&cond_attr, PTHREAD_PROCESS_SHARED);
pthread_cond_init (&shm_hdr->cond, &cond_attr);
}
shm_hdr->predicate = 0;
out->fd = shm_fd;
out->hdr = shm_hdr;
return 0;
}
int main(int argc, char **argv) {
if (argc != 3) {
printUsage();
return 0;
}
char *shm_1_name = argv[1];
char *shm_2_name = argv[2];
channel_t ch_1;
if (init_channel(shm_1_name, &ch_1) != 0) {
return 1;
}
channel_t ch_2;
if (init_channel(shm_2_name, &ch_2) != 0) {
munmap( ch_1.hdr, sizeof(channel_hdr_t) );
close( ch_1.fd );
return 2;
}
int counter = 0;
int counter2 = 0;
while (TRUE) {
++counter;
if (counter == 100000) {
printf("alive %d\n", counter2);
++counter2;
counter = 0;
}
int ret = robust_mutex_lock(&ch_1.hdr->mtx);
if (ret != 0) {
return ret;
}
ch_1.hdr->predicate = 1;
pthread_cond_broadcast (&ch_1.hdr->cond); // deadlock here
pthread_mutex_unlock (&ch_1.hdr->mtx);
ret = robust_mutex_lock(&ch_2.hdr->mtx);
if (ret != 0) {
return ret;
}
while (ch_2.hdr->predicate == 0 && ret == 0)
{
ret = pthread_cond_wait (&ch_2.hdr->cond, &ch_2.hdr->mtx); // deadlock here
}
ch_2.hdr->predicate = 0;
pthread_mutex_unlock (&ch_2.hdr->mtx);
}
munmap( ch_1.hdr, sizeof(channel_hdr_t) );
close( ch_1.fd );
munmap( ch_2.hdr, sizeof(channel_hdr_t) );
close( ch_2.fd );
return 0;
}
To reproduce the deadlock:
run the first instance of program with args: channel1 channel2
run the second instance of program with args: channel2 channel1
interrupt both programs with Ctrl+C
run both programs again
The problem was not present in Ubuntu 16.04.
However, it happens in 18.04.
The backtraces of both programs in deadlock:
First:
#0 0x00007f9802d989f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f98031cd02c)
at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x7f98031cd030, cond=0x7f98031cd000) at pthread_cond_wait.c:502
#2 __pthread_cond_wait (cond=0x7f98031cd000, mutex=0x7f98031cd030) at pthread_cond_wait.c:655
#3 0x00005648bc2af081 in main (argc=<optimized out>, argv=<optimized out>)
at /home/dseredyn/ws_velma/ws_fabric/src/shm_comm/src/test2.c:198
Second:
#0 0x00007f1a3434b724 in futex_wait (private=<optimized out>, expected=3, futex_word=0x7f1a34780010)
at ../sysdeps/unix/sysv/linux/futex-internal.h:61
#1 futex_wait_simple (private=<optimized out>, expected=3, futex_word=0x7f1a34780010)
at ../sysdeps/nptl/futex-internal.h:135
#2 __condvar_quiesce_and_switch_g1 (private=<optimized out>, g1index=<synthetic pointer>, wseq=<optimized out>,
cond=0x7f1a34780000) at pthread_cond_common.c:412
#3 __pthread_cond_broadcast (cond=0x7f1a34780000) at pthread_cond_broadcast.c:73
#4 0x0000557a978b2043 in main (argc=<optimized out>, argv=<optimized out>)
at /home/dseredyn/ws_velma/ws_fabric/src/shm_comm/src/test2.c:185
Related
I am trying to implement a simple producer/consumer code using pthreads. The only common shared data between producer and consumer thread is the count variable used for counting the number of available elements in the shared array. What is happening is the count updated in one thread is not getting reflected in other. How can I make sure the writes to count in one thread appear in other as well? Am I missing something?
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define ARRAY_SIZE 100
int array[ARRAY_SIZE];
volatile int count;
int head;
int tail;
pthread_cond_t full = PTHREAD_COND_INITIALIZER;
pthread_cond_t empty = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *producer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == ARRAY_SIZE) {
printf("\nNo space for new items waiting for consumer to consume");
pthread_cond_wait(&empty, &mutex);
// Sometimes, why is count variable still ARRAY_SIZE.
// How do I make sure writes to 'count' variable in
// consumer thread is visible immediately in producer
// thread?
if (count == ARRAY_SIZE) {
printf("\ncount is still ARRAY_SIZE");
exit(0);
}
}
head %= ARRAY_SIZE;
count++;
array[head] = head;
printf("\nproduced %d/%d", head, count);
head++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&full);
}
}
void *consumer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == 0) {
printf("\nNo items available waiting for producer to produce");
pthread_cond_wait(&full, &mutex);
// Sometimes, why is count variable still zero. How do I
// make sure writes to 'count' variable in producer
// thread is visible immediately in consumer thread?
if (count == 0) {
printf("\ncount is still zero");
exit(0);
}
}
tail %= ARRAY_SIZE;
int ele = array[tail];
count--;
printf("\nconsumed %d/%d", tail, count);
tail++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&empty);
}
}
int main()
{
pthread_t producer_thread;
pthread_t consumer_thread;
int ret = 0;
setbuf(stdout, NULL);
ret = pthread_create(&producer_thread, NULL, producer, NULL);
if (ret != 0) {
printf("\nUnable to create producer thread %d", ret);
goto exit;
}
ret = pthread_create(&consumer_thread, NULL, consumer, NULL);
if (ret != 0) {
printf("\nUnable to create consumer thread %d", ret);
goto exit;
}
pthread_join(producer_thread, NULL);
pthread_join(consumer_thread, NULL);
exit:
return ret;
}
produced 72/99
produced 73/100
No space for new items waiting for consumer to consume
consumed 74/99
consumed 75/98
consumed 76/97
consumed 77/96
produced 74/97
produced 75/98
produced 76/99
produced 77/100
No space for new items waiting for consumer to consume
count is still ARRAY_SIZE <------ incorrect
consumed 21/2
consumed 22/1
consumed 23/0
No items available waiting for producer to produce
produced 24/1
consumed 24/0
No items available waiting for producer to produce
produced 25/1
produced 26/2
produced 27/3
consumed 25/2
consumed 26/1
consumed 27/0
No items available waiting for producer to produce
count is still zero <------ incorrect
Solution that worked after the fix from Zan Lynx
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define ARRAY_SIZE 100
int array[ARRAY_SIZE];
volatile int count;
int head;
int tail;
pthread_cond_t full = PTHREAD_COND_INITIALIZER;
pthread_cond_t empty = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *producer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == ARRAY_SIZE) {
printf("\nNo space for new items waiting for consumer to consume");
// Spurious wakeups from the pthread_cond_timedwait() or
// pthread_cond_wait() functions may occur. Since the
// return from pthread_cond_timedwait() or
// pthread_cond_wait() does not imply anything about the
// value of this predicate, the predicate should be
// re-evaluated upon such return.
while (count == ARRAY_SIZE)
pthread_cond_wait(&empty, &mutex);
}
head %= ARRAY_SIZE;
count++;
array[head] = head;
printf("\nproduced %d/%d", head, count);
head++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&full);
}
return NULL;
}
void *consumer(void *args)
{
int res = 0;
while (1) {
pthread_mutex_lock(&mutex);
if (count == 0) {
printf("\nNo items available waiting for producer to produce");
// Spurious wakeups from the pthread_cond_timedwait() or
// pthread_cond_wait() functions may occur. Since the
// return from pthread_cond_timedwait() or
// pthread_cond_wait() does not imply anything about the
// value of this predicate, the predicate should be
// re-evaluated upon such return.
while (count == 0)
pthread_cond_wait(&full, &mutex);
}
tail %= ARRAY_SIZE;
int ele = array[tail];
count--;
printf("\nconsumed %d/%d", tail, count);
tail++;
pthread_mutex_unlock(&mutex);
pthread_cond_signal(&empty);
}
return NULL;
}
int main()
{
pthread_t producer_thread;
pthread_t consumer_thread;
int ret = 0;
setbuf(stdout, NULL);
ret = pthread_create(&producer_thread, NULL, producer, NULL);
if (ret != 0) {
printf("\nUnable to create producer thread %d", ret);
goto exit;
}
ret = pthread_create(&consumer_thread, NULL, consumer, NULL);
if (ret != 0) {
printf("\nUnable to create consumer thread %d", ret);
goto exit;
}
pthread_join(producer_thread, NULL);
pthread_join(consumer_thread, NULL);
exit:
return ret;
}
I believe you missed the fact that condition waits must always check the predicate again after the wait returns. There must be a loop.
The wait may end for all sorts of reasons besides a signal/notify call.
I'm using popen to execute a command and read the output. I'm setting the file descriptor to non-blocking mode so that I can put in my own timeout, as follows:
auto stream = popen(cmd.c_str(), "r");
int fd = fileno(stream);
int flags = fcntl(fd, F_GETFL, 0);
flags |= O_NONBLOCK;
fcntl(fd, F_SETFL, flags);
while(!feof(stream)) {
if(fgets(buffer, MAX_BUF, stream) != NULL) {
// do something with buffer...
}
sleep(10);
}
pclose(stream);
This works just fine, except that fgets keeps returning NULL, until the program has finished executing, at which time it returns all the output as expected.
In other words, even if the program immediately outputs some text and a newline to the stdout, my loop doesn't read it immediately; it only sees it later.
In the documentation for popen I see:
Note that output popen() streams are block buffered by default.
I've tried a few things to turn off buffering (ex. setvbuf(stream, NULL, _IONBF, 0)) , but so far no luck.
How do I turn off buffering so that I can read the output in real-time?
Thank you!
A solution based on something like select() would be more accurate and flexible. Try this :
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <sys/select.h>
void read_cmd(const char *cmd)
{
FILE *stream;
int fd;
int flags;
char buffer[1024];
fd_set fdset;
struct timeval timeout;
int rc;
int eof;
stream = popen(cmd, "r");
fd = fileno(stream);
eof = 0;
while(!eof) {
timeout.tv_sec = 10; // 10 seconds
timeout.tv_usec = 0;
FD_ZERO(&fdset);
FD_SET(fd, &fdset);
rc = select(fd + 1, &fdset, 0, 0, &timeout);
switch(rc) {
case -1: {
// Error
if (errno != EINTR) {
fprintf(stderr, "select(): error '%m' (%d)\n", errno);
}
return;
}
break;
case 0: {
// Timeout
printf("Timeout\n");
}
break;
case 1: {
// Something to read
rc = read(fd, buffer, sizeof(buffer) - 1);
if (rc > 0) {
buffer[rc] = '\0';
printf("%s", buffer);
fflush(stdout);
}
if (rc < 0) {
fprintf(stderr, "read(): error '%m' (%d)\n", errno);
eof = 1;
}
if (0 == rc) {
// End of file
eof = 1;
}
}
break;
} // End switch
} // End while
pclose(stream);
}
int main(int ac, char *av[])
{
read_cmd(av[1]);
return 0;
} // main
What would be your suggestion in order to create a single instance application, so that only one process is allowed to run at a time? File lock, mutex or what?
A good way is:
#include <sys/file.h>
#include <errno.h>
int pid_file = open("/var/run/whatever.pid", O_CREAT | O_RDWR, 0666);
int rc = flock(pid_file, LOCK_EX | LOCK_NB);
if(rc) {
if(EWOULDBLOCK == errno)
; // another instance is running
}
else {
// this is the first instance
}
Note that locking allows you to ignore stale pid files (i.e. you don't have to delete them). When the application terminates for any reason the OS releases the file lock for you.
Pid files are not terribly useful because they can be stale (the file exists but the process does not). Hence, the application executable itself can be locked instead of creating and locking a pid file.
A more advanced method is to create and bind a unix domain socket using a predefined socket name. Bind succeeds for the first instance of your application. Again, the OS unbinds the socket when the application terminates for any reason. When bind() fails another instance of the application can connect() and use this socket to pass its command line arguments to the first instance.
Here is a solution in C++. It uses the socket recommendation of Maxim. I like this solution better than the file based locking solution, because the file based one fails if the process crashes and does not delete the lock file. Another user will not be able to delete the file and lock it. The sockets are automatically deleted when the process exits.
Usage:
int main()
{
SingletonProcess singleton(5555); // pick a port number to use that is specific to this app
if (!singleton())
{
cerr << "process running already. See " << singleton.GetLockFileName() << endl;
return 1;
}
... rest of the app
}
Code:
#include <netinet/in.h>
class SingletonProcess
{
public:
SingletonProcess(uint16_t port0)
: socket_fd(-1)
, rc(1)
, port(port0)
{
}
~SingletonProcess()
{
if (socket_fd != -1)
{
close(socket_fd);
}
}
bool operator()()
{
if (socket_fd == -1 || rc)
{
socket_fd = -1;
rc = 1;
if ((socket_fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
{
throw std::runtime_error(std::string("Could not create socket: ") + strerror(errno));
}
else
{
struct sockaddr_in name;
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
rc = bind (socket_fd, (struct sockaddr *) &name, sizeof (name));
}
}
return (socket_fd != -1 && rc == 0);
}
std::string GetLockFileName()
{
return "port " + std::to_string(port);
}
private:
int socket_fd = -1;
int rc;
uint16_t port;
};
For windows, a named kernel object (e.g. CreateEvent, CreateMutex). For unix, a pid-file - create a file and write your process ID to it.
You can create an "anonymous namespace" AF_UNIX socket. This is completely Linux-specific, but has the advantage that no filesystem actually has to exist.
Read the man page for unix(7) for more info.
Avoid file-based locking
It is always good to avoid a file based locking mechanism to implement the singleton instance of an application. The user can always rename the lock file to a different name and run the application again as follows:
mv lockfile.pid lockfile1.pid
Where lockfile.pid is the lock file based on which is checked for existence before running the application.
So, it is always preferable to use a locking scheme on object directly visible to only the kernel. So, anything which has to do with a file system is not reliable.
So the best option would be to bind to a inet socket. Note that unix domain sockets reside in the filesystem and are not reliable.
Alternatively, you can also do it using DBUS.
It's seems to not be mentioned - it is possible to create a mutex in shared memory but it needs to be marked as shared by attributes (not tested):
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_t *mutex = shmat(SHARED_MEMORY_ID, NULL, 0);
pthread_mutex_init(mutex, &attr);
There is also shared memory semaphores (but I failed to find out how to lock one):
int sem_id = semget(SHARED_MEMORY_KEY, 1, 0);
No one has mentioned it, but sem_open() creates a real named semaphore under modern POSIX-compliant OSes. If you give a semaphore an initial value of 1, it becomes a mutex (as long as it is strictly released only if a lock was successfully obtained).
With several sem_open()-based objects, you can create all of the common equivalent Windows named objects - named mutexes, named semaphores, and named events. Named events with "manual" set to true is a bit more difficult to emulate (it requires four semaphore objects to properly emulate CreateEvent(), SetEvent(), and ResetEvent()). Anyway, I digress.
Alternatively, there is named shared memory. You can initialize a pthread mutex with the "shared process" attribute in named shared memory and then all processes can safely access that mutex object after opening a handle to the shared memory with shm_open()/mmap(). sem_open() is easier if it is available for your platform (if it isn't, it should be for sanity's sake).
Regardless of the method you use, to test for a single instance of your application, use the trylock() variant of the wait function (e.g. sem_trywait()). If the process is the only one running, it will successfully lock the mutex. If it isn't, it will fail immediately.
Don't forget to unlock and close the mutex on application exit.
It will depend on which problem you want to avoid by forcing your application to have only one instance and the scope on which you consider instances.
For a daemon — the usual way is to have a /var/run/app.pid file.
For user application, I've had more problems with applications which prevented me to run them twice than with being able to run twice an application which shouldn't have been run so. So the answer on "why and on which scope" is very important and will probably bring answer specific on the why and the intended scope.
Here is a solution based on sem_open
/*
*compile with :
*gcc single.c -o single -pthread
*/
/*
* run multiple instance on 'single', and check the behavior
*/
#include <stdio.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>
#include <errno.h>
#define SEM_NAME "/mysem_911"
int main()
{
sem_t *sem;
int rc;
sem = sem_open(SEM_NAME, O_CREAT, S_IRWXU, 1);
if(sem==SEM_FAILED){
printf("sem_open: failed errno:%d\n", errno);
}
rc=sem_trywait(sem);
if(rc == 0){
printf("Obtained lock !!!\n");
sleep(10);
//sem_post(sem);
sem_unlink(SEM_NAME);
}else{
printf("Lock not obtained\n");
}
}
One of the comments on a different answer says "I found sem_open() rather lacking". I am not sure about the specifics of what's lacking
Based on the hints in maxim's answer here is my POSIX solution of a dual-role daemon (i.e. a single application that can act as daemon and as a client communicating with that daemon). This scheme has the advantage of providing an elegant solution of the problem when the instance started first should be the daemon and all following executions should just load off the work at that daemon. It is a complete example but lacks a lot of stuff a real daemon should do (e.g. using syslog for logging and fork to put itself into background correctly, dropping privileges etc.), but it is already quite long and is fully working as is. I have only tested this on Linux so far but IIRC it should be all POSIX-compatible.
In the example the clients can send integers passed to them as first command line argument and parsed by atoi via the socket to the daemon which prints it to stdout. With this kind of sockets it is also possible to transfer arrays, structs and even file descriptors (see man 7 unix).
#include <stdio.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_NAME "/tmp/exampled"
static int socket_fd = -1;
static bool isdaemon = false;
static bool run = true;
/* returns
* -1 on errors
* 0 on successful server bindings
* 1 on successful client connects
*/
int singleton_connect(const char *name) {
int len, tmpd;
struct sockaddr_un addr = {0};
if ((tmpd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
printf("Could not create socket: '%s'.\n", strerror(errno));
return -1;
}
/* fill in socket address structure */
addr.sun_family = AF_UNIX;
strcpy(addr.sun_path, name);
len = offsetof(struct sockaddr_un, sun_path) + strlen(name);
int ret;
unsigned int retries = 1;
do {
/* bind the name to the descriptor */
ret = bind(tmpd, (struct sockaddr *)&addr, len);
/* if this succeeds there was no daemon before */
if (ret == 0) {
socket_fd = tmpd;
isdaemon = true;
return 0;
} else {
if (errno == EADDRINUSE) {
ret = connect(tmpd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
if (ret != 0) {
if (errno == ECONNREFUSED) {
printf("Could not connect to socket - assuming daemon died.\n");
unlink(name);
continue;
}
printf("Could not connect to socket: '%s'.\n", strerror(errno));
continue;
}
printf("Daemon is already running.\n");
socket_fd = tmpd;
return 1;
}
printf("Could not bind to socket: '%s'.\n", strerror(errno));
continue;
}
} while (retries-- > 0);
printf("Could neither connect to an existing daemon nor become one.\n");
close(tmpd);
return -1;
}
static void cleanup(void) {
if (socket_fd >= 0) {
if (isdaemon) {
if (unlink(SOCKET_NAME) < 0)
printf("Could not remove FIFO.\n");
} else
close(socket_fd);
}
}
static void handler(int sig) {
run = false;
}
int main(int argc, char **argv) {
switch (singleton_connect(SOCKET_NAME)) {
case 0: { /* Daemon */
struct sigaction sa;
sa.sa_handler = &handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGINT, &sa, NULL) != 0 || sigaction(SIGQUIT, &sa, NULL) != 0 || sigaction(SIGTERM, &sa, NULL) != 0) {
printf("Could not set up signal handlers!\n");
cleanup();
return EXIT_FAILURE;
}
struct msghdr msg = {0};
struct iovec iovec;
int client_arg;
iovec.iov_base = &client_arg;
iovec.iov_len = sizeof(client_arg);
msg.msg_iov = &iovec;
msg.msg_iovlen = 1;
while (run) {
int ret = recvmsg(socket_fd, &msg, MSG_DONTWAIT);
if (ret != sizeof(client_arg)) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
printf("Error while accessing socket: %s\n", strerror(errno));
exit(1);
}
printf("No further client_args in socket.\n");
} else {
printf("received client_arg=%d\n", client_arg);
}
/* do daemon stuff */
sleep(1);
}
printf("Dropped out of daemon loop. Shutting down.\n");
cleanup();
return EXIT_FAILURE;
}
case 1: { /* Client */
if (argc < 2) {
printf("Usage: %s <int>\n", argv[0]);
return EXIT_FAILURE;
}
struct iovec iovec;
struct msghdr msg = {0};
int client_arg = atoi(argv[1]);
iovec.iov_base = &client_arg;
iovec.iov_len = sizeof(client_arg);
msg.msg_iov = &iovec;
msg.msg_iovlen = 1;
int ret = sendmsg(socket_fd, &msg, 0);
if (ret != sizeof(client_arg)) {
if (ret < 0)
printf("Could not send device address to daemon: '%s'!\n", strerror(errno));
else
printf("Could not send device address to daemon completely!\n");
cleanup();
return EXIT_FAILURE;
}
printf("Sent client_arg (%d) to daemon.\n", client_arg);
break;
}
default:
cleanup();
return EXIT_FAILURE;
}
cleanup();
return EXIT_SUCCESS;
}
All credits go to Mark Lakata. I merely did some very minor touch up only.
main.cpp
#include "singleton.hpp"
#include <iostream>
using namespace std;
int main()
{
SingletonProcess singleton(5555); // pick a port number to use that is specific to this app
if (!singleton())
{
cerr << "process running already. See " << singleton.GetLockFileName() << endl;
return 1;
}
// ... rest of the app
}
singleton.hpp
#include <netinet/in.h>
#include <unistd.h>
#include <cerrno>
#include <string>
#include <cstring>
#include <stdexcept>
using namespace std;
class SingletonProcess
{
public:
SingletonProcess(uint16_t port0)
: socket_fd(-1)
, rc(1)
, port(port0)
{
}
~SingletonProcess()
{
if (socket_fd != -1)
{
close(socket_fd);
}
}
bool operator()()
{
if (socket_fd == -1 || rc)
{
socket_fd = -1;
rc = 1;
if ((socket_fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
{
throw std::runtime_error(std::string("Could not create socket: ") + strerror(errno));
}
else
{
struct sockaddr_in name;
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
rc = bind (socket_fd, (struct sockaddr *) &name, sizeof (name));
}
}
return (socket_fd != -1 && rc == 0);
}
std::string GetLockFileName()
{
return "port " + std::to_string(port);
}
private:
int socket_fd = -1;
int rc;
uint16_t port;
};
#include <windows.h>
int main(int argc, char *argv[])
{
// ensure only one running instance
HANDLE hMutexH`enter code here`andle = CreateMutex(NULL, TRUE, L"my.mutex.name");
if (GetLastError() == ERROR_ALREADY_EXISTS)
{
return 0;
}
// rest of the program
ReleaseMutex(hMutexHandle);
CloseHandle(hMutexHandle);
return 0;
}
FROM: HERE
On Windows you could also create a shared data segment and use an interlocked function to test for the first occurence, e.g.
#include <Windows.h>
#include <stdio.h>
#include <conio.h>
#pragma data_seg("Shared")
volatile LONG lock = 0;
#pragma data_seg()
#pragma comment(linker, "/SECTION:Shared,RWS")
void main()
{
if (InterlockedExchange(&lock, 1) == 0)
printf("first\n");
else
printf("other\n");
getch();
}
I have just written one, and tested.
#define PID_FILE "/tmp/pidfile"
static void create_pidfile(void) {
int fd = open(PID_FILE, O_RDWR | O_CREAT | O_EXCL, 0);
close(fd);
}
int main(void) {
int fd = open(PID_FILE, O_RDONLY);
if (fd > 0) {
close(fd);
return 0;
}
// make sure only one instance is running
create_pidfile();
}
Just run this code on a seperate thread:
void lock() {
while(1) {
ofstream closer("myapplock.locker", ios::trunc);
closer << "locked";
closer.close();
}
}
Run this as your main code:
int main() {
ifstream reader("myapplock.locker");
string s;
reader >> s;
if (s != "locked") {
//your code
}
return 0;
}
I've written the program which spawns a thread that reads in a loop from stdin in a blocking fashion. I want to make the thread return from blocked read immediately. I've registered my signal handler (with sigaction and without SA_RESTART flag) in the reading thread, send it a signal and expect read to exit with EINTR error. But it doesn't happen. Is it issue or limitation of Cygwin or am I doing something wrong?
Here is the code:
#include <stdio.h>
#include <errno.h>
#include <pthread.h>
pthread_t thread;
volatile int run = 0;
void root_handler(int signum)
{
printf("%s ENTER (thread is %x)\n", __func__, pthread_self());
run = 0;
}
void* thr_func(void*arg)
{ int res;
char buffer[256];
printf("%s ENTER (thread is %x)\n", __func__, pthread_self());
struct sigaction act;
memset (&act, 0, sizeof(act));
act.sa_sigaction = &root_handler;
//act.sa_flags = SA_RESTART;
if (sigaction(SIGUSR1, &act, NULL) < 0) {
perror ("sigaction error");
return 1;
}
while(run)
{
res = read(0,buffer, sizeof(buffer));
if(res == -1)
{
if(errno == EINTR)
{
puts("read was interrupted by signal");
}
}
else
{
printf("got: %s", buffer);
}
}
printf("%s LEAVE (thread is %x)\n", __func__, pthread_self());
}
int main() {
run = 1;
printf("root thread: %x\n", pthread_self());
pthread_create(&thread, NULL, &thr_func, NULL);
printf("thread %x started\n", thread);
sleep(4);
pthread_kill(thread, SIGUSR1 );
//raise(SIGUSR1);
pthread_join(thread, NULL);
return 0;
}
I'm using Cygwin (1.7.32(0.274/5/3)).
I've just tried to do the same on Ubuntu and it works (I needed to include signal.h, though, even though in Cygwin it compiled as it is). It seems to be peculiarity of Cygwin's implementation.
My problem deals with a segmentation fault that I get when I run this program on a linux machine versus my own mac computer. This program runs how I believe it should on my own mac computer, yet when I try to run it on my school's linux computers, I get a segmentation fault that doesn't appear on my mac computer. I'll give a brief background on the assignment and then go over the problem in more detail.
So I have this program which basically simulates baboons crossing a ravine with a single rope. Only one baboon can cross at a time and there are certain restraints on the number of baboons that can cross at a time, as well as how many baboons can cross from one direction before baboons from the other direction are allowed to cross. The implementation of the code.
I have searched for segmentation fault questions already here on stackoverflow, yet most of them deal with multiple processes whereas I am merely using different threads. The segmentation fault ends up coming from waiting on a semaphore that doesn't exist, yet when I checked to see whether it was initialized, it was successfully initialized. Again, this program works on my mac but then doesn't work when I try to run it on my Mac. Any help at all understanding why it can't run on the linux machines but can run on the mac. If any more information is needed, I would be happy to provide it. I did error check at one point but that code was deleted off the school computers. My error checking, as far as I remember, didn't show any errors.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/time.h>
#include <time.h>
#include <pthread.h>
#include <semaphore.h>
#include <fcntl.h>
#include <sys/stat.h> //for mode flags, if needed for future use
#define ATOB_COUNT 20
#define BTOA_COUNT 20
#define RANDOM_SEED 2123
//semaphore names
#define MUTEX_SEM "/mutex"
#define TOB_SEM "/toB"
#define TOA_SEM "/toA"
//define methods here if needed
void *toAThread(void *threadId);
void *toBThread(void *threadId);
void my_sleep(int limit);
void sem_open_errorCheck(char *name, unsigned int startingValue, sem_t *result);
//defining semaphores and shared variables
sem_t *mutex, *toB, *toA;
int xingCount = 0;
int xedCount = 0;
int toBWaitCount = 0;
int toAWaitCount = 0;
enum xingDirectionTypes {
none,
aToB,
bToA
};
enum xingDirectionTypes xingDirection = none;
char orderLeaving[100];
struct threadInfo {
int threadId;
};
struct threadInfo atobIDs[ATOB_COUNT];
struct threadInfo btoaIDs[BTOA_COUNT];
int main(void) {
pthread_t atobPTHREADS[ATOB_COUNT];
pthread_t btoaPTHREADS[BTOA_COUNT];
pthread_attr_t attr;
void *status;
srandom(RANDOM_SEED);
//call helper method which creates semaphore and errorchecks
sem_open_errorCheck(MUTEX_SEM, (unsigned int)1, mutex);
sem_open_errorCheck(TOA_SEM, (unsigned int)0, toA);
sem_open_errorCheck(TOB_SEM, (unsigned int)0, toB);
//Creating a set of attributes to send to the threads
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
//spawn toB baboons
int counter;
for (counter = 0; counter < BTOA_COUNT; counter++) {
atobIDs[counter].threadId = counter;
int result;
if ((result = pthread_create(&atobPTHREADS[counter], &attr, toBThread, (void*) &atobIDs[counter])) == -1) {
perror("Thread Creation Error: atob baboon");
exit(EXIT_FAILURE);
}
}
//spawn toA baboons
for (counter = 0; counter < ATOB_COUNT; counter++) {
btoaIDs[counter].threadId = counter + 20;
int result;
if ((result = pthread_create(&btoaPTHREADS[counter], &attr, toAThread, (void*) &btoaIDs[counter])) == -1) {
perror("Thread Creation Error: btoa baboon");
exit(EXIT_FAILURE);
}
}
//Wait for all the threads to finish
for(counter = 0; counter < ATOB_COUNT; counter++)
{
int result = pthread_join(atobPTHREADS[counter], &status);
if(result == -1)
{
perror("Thread Join: AtoB");
exit(EXIT_FAILURE);
}
}
for(counter = 0; counter < BTOA_COUNT; counter++)
{
int result = pthread_join(btoaPTHREADS[counter], &status);
if(result == -1)
{
perror("Thread Join: BtoA");
exit(EXIT_FAILURE);
}
}
printf("The order leaving %s", orderLeaving);
exit(EXIT_SUCCESS);
}
void *toBThread(void *threadId) {
struct threadInfo *info;
info = (struct threadInfo *)threadId;
int id = info->threadId;
my_sleep(100); //simulate being idle for 1-100ms
//for order checking
char *baboonOrder;
baboonOrder = "B ";
strcat(orderLeaving, baboonOrder);
sem_wait(mutex);
if ((xingDirection == aToB || xingDirection == none) && xingCount < 5 && (xedCount + xingCount) < 10) { //there is an extra parenthesis here in the solutions
xingDirection = aToB;
xingCount++;
printf("AtoB baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
else {
toBWaitCount++;
sem_post(mutex);
sem_wait(toB);
toBWaitCount--;
xingCount++;
xingDirection = aToB;
printf("AtoB baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
//CROSSING
sem_wait(mutex);
printf("AtoB baboon (thread %d) got off the rope\n", id);
xedCount++;
xingCount--;
if (toBWaitCount != 0 && (((xedCount+xingCount)<10) || ((xedCount+xingCount) >= 10 && toAWaitCount == 0))) {
sem_post(toB);
}
else {
if (xingCount == 0 && toAWaitCount != 0 && (toBWaitCount == 0 || (xedCount + xingCount)>=10)) {
xingDirection = bToA;
xedCount = 0;
sem_post(toA);
}
else {
if (xingCount == 0 && toBWaitCount == 0 && toAWaitCount == 0) {
xingDirection = none;
xedCount = 0;
sem_post(mutex);
}
else {
sem_post(mutex);
}
}
}
}
/*
baboons going from side a to side b
*/
void *toAThread(void *threadId) {
struct threadInfo *info;
info = (struct threadInfo *)threadId;
int id = info->threadId;
my_sleep(100);
//for order checking
char *baboonOrder;
baboonOrder = "A ";
strcat(orderLeaving, baboonOrder);
sem_wait(mutex);
if ((xingDirection == bToA || xingDirection == none) && xingCount < 5 && (xedCount + xingCount) < 10) { //there is an extra parenthesis here in the solutions
xingDirection = bToA;
xingCount++;
printf("BtoA baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
else {
toAWaitCount++;
sem_post(mutex);
sem_wait(toA);
toAWaitCount--;
xingCount++;
xingDirection = bToA;
printf("BtoA baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
//CROSSING
sem_wait(mutex);
printf("BtoA baboon (thread %d) got off the rope\n", id);
xedCount++;
xingCount--;
if (toAWaitCount != 0 && (((xedCount+xingCount)<10) || ((xedCount+xingCount) >= 10 && toBWaitCount == 0))) {
sem_post(toA);
}
else {
if (xingCount == 0 && toBWaitCount != 0 && (toAWaitCount == 0 || (xedCount + xingCount)>=10)) {
xingDirection = aToB;
xedCount = 0;
sem_post(toB);
}
else {
if (xingCount == 0 && toAWaitCount == 0 && toBWaitCount == 0) {
xingDirection = none;
xedCount = 0;
sem_post(mutex);
}
else {
sem_post(mutex);
}
}
}
}
//taken with permission from readers/writers problem
//Puts the calling thread to sleep to simulate both random start times and random workloads
void my_sleep(int limit) {
struct timespec time_ns;
int duration = random() % limit + 1;
time_ns.tv_sec = 0;
time_ns.tv_nsec = duration * 1000000;
int result = nanosleep(&time_ns, NULL);
if (result != 0)
{
perror("Nanosleep");
exit(EXIT_FAILURE);
}
}
void sem_open_errorCheck(char *name, unsigned int startingValue, sem_t *result) {
sem_unlink(name);
result = sem_open(name, O_CREAT, 0600, startingValue);
if (result == -1) {
perror("sem_open error: semaphore failed to open correctly");
exit(EXIT_FAILURE);
}
}
How to debug stuff like this
The best way to debug this is to run it using the gdb debugger. Like this:
gdb my-monkey-program
(gdb) run
Program received signal SIGSEGV, Segmentation fault.
(gdb) info threads
(gdb) bt
Another excellent idea is to run it with valgrind:
valgrind ./my-monkey-program
which will tell you about invalid memory accesses and all sorts of things.
Your specific problem
gdb reports that the call stack is:
#0 sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:45
#1 0x0000000000400e8d in toAThread (threadId=0x602160) at test.c:190
#2 0x00007ffff7bc4e9a in start_thread (arg=0x7fffed7e9700) at pthread_create.c:308
#3 0x00007ffff78f1cbd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#4 0x0000000000000000 in ?? ()
Here are the line numbers from my compile:
187 baboonOrder = "A ";
188 strcat(orderLeaving, baboonOrder);
189
190 sem_wait(mutex);
This is because mutex is NULL.
Why it breaks
You're never actually assigning to the mutex variable. You're passing a pointer into sem_open_errorCheck, but what you really need to pass is a pointer-to-a-pointer. Presumably the same applies to toA and toB.
It's just luck that it worked on the Mac!